Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

crashes in ieee80211_setup_ratetable #320

Open
flameboss opened this issue Jun 23, 2020 · 10 comments
Open

crashes in ieee80211_setup_ratetable #320

flameboss opened this issue Jun 23, 2020 · 10 comments

Comments

@flameboss
Copy link

In order to understand why customers were having connection problems, I integrated code from Arduino and other sources to report crashes back to our server. And I've seen over 1000 crash reports in just a few weeks. I'm attaching the code I use to save the exception info to flash.

The most common crash happens when ieee80211_setup_ratetable is calling memcpy.

How can I troubleshoot this?

Here is the info from printstack.sh

Fatal exception 29
STORE_PROHIBITED
pc=0x4000df64 sp=0x3ffffbe0 excvaddr=0x00000000
ps=0x00000030 sar=0x00000004 vpri=0x4022426b
r00: 0x400018bc=1073748156 r01: 0x3ffffbe0=1073740768 r02: 0x00000000= 0
r03: 0x4024f304=1076163332 r04: 0x000000d4= 212 r05: 0x00000000= 0
r06: 0x0000000c= 12 r07: 0x00000000= 0 r08: 0x4024f3d4=1076163540
r09: 0x00000000= 0 r10: 0x00000000= 0 r11: 0x00000000= 0
r12: 0x00000000= 0 r13: 0x3fff4720=1073694496 r14: 0x00000016= 22
r15: 0x3fff4074=1073692788
section 1
type 1
len 976

Stack dump x3ffffbe0 x3d0:
3ffffbe0: 4000df64 00000030 00000004 4022426b
3ffffbf0: 3fff4720 00000016 3fff4074 0000001d
3ffffc00: 00000000 3fff6660 00000000 4024b7b8
3ffffc10: 00000000 3fffbff0 3fff96c8 401077a8
3ffffc20: 4020769b 00000000 00000020 4010522e
3ffffc30: 3fff4720 3ffffdb0 3ffffdb0 00000000
3ffffc40: 3fff1958 3fff40c0 ffffffcf 3fff1968
3ffffc50: 40000f58 00000000 000088c0 00000000
3ffffc60: 00000000 00000000 3fff1a1d 3fff19e3
3ffffc70: 00000000 3fff1958 3fff37f0 3fff1958
3ffffc80: 00000080 00000080 00000000 3fffdcb0
3ffffc90: 3ffffed8 4010027d 000000b4 00000002
3ffffca0: 3ffe8f90 00006274 3ffef1fc 00000001
3ffffcb0: 00006274 5ffffe00 00000001 401000a0
3ffffcc0: 4a4ef8f8 383c4046 01010000 05040302
3ffffcd0: 00000000 00000000 00000000 00000000
3ffffce0: 00000000 00000000 00000000 00000000
3ffffcf0: 00000000 00000000 00000000 00000000
3ffffd00: 00000000 00000000 00000000 00000000
3ffffd10: 00000000 00000000 00000000 00000000

0x4000df64: ?? ??:0
0x00000000: ?? ??:0
0x4020769b: ieee80211_setup_ratetable at ??:?
0x40000f58: ?? ??:0
0x3ffffed8: ?? ??:0
0x00006274: ?? ??:0

@flameboss
Copy link
Author

flameboss commented Jun 23, 2020

crash.c

/*
 * Handle crashes
 * Code adapted from Arduino ESP8266, esp-gdbstub and Espruino
 * https://gitlab.fai.utb.cz/jurenat/Espruino/commit/30aae54c80b6958d22447fc76f6d15d4c9da81c0
 */

#include <crash.h>
#include <bsp.h>
#include <xtensa/corebits.h>


struct XTensa_exception_frame_s {
    uint32_t pc;
    uint32_t ps;
    uint32_t sar;
    uint32_t vpri;
    uint32_t a0;
    uint32_t a[14]; //a2..a15
//These are added manually by the exception code; the HAL doesn't set these on an exception.
    uint32_t litbase;
    uint32_t sr176;
    uint32_t sr208;
    uint32_t a1;
    uint32_t reason;
    uint32_t excvaddr;
};

struct XTensa_exception_frame_s gdbstub_savedRegs;

void _xtos_set_exception_handler(int cause, void (exhandler)(struct XTensa_exception_frame_s *frame));

void gdbstub_save_extra_sfrs_for_exception();

uint8_t crash_in_exception;


static uint32_t crash_dump_len(uint32_t start, uint32_t end)
{
    uint32_t stack_len = end - start;
    uint32_t image_len =    sizeof(crash_header_st) +
                            sizeof(crash_sec_header_st) * 2 +
                            sizeof(gdbstub_savedRegs) +
                            stack_len;
    return image_len;
}

static void crash_dump_write(int fd, uint32_t in_faddr, uint32_t start, uint32_t end)
{
    uint32_t faddr = in_faddr;
    crash_header_st hdr;
    crash_sec_header_st sec_hdr;
    int32_t stack_len = end - start;

    hdr.version = CRASH_IMAGE_VERSION;
    hdr.num_sections = 2;
    hdr.hw_id = hw_id;
    hdr.pos = system_upgrade_userbin_check();
    hdr.fw_ver = VERSION32;
    ffs_program(fd, faddr, &hdr, sizeof(hdr));
    faddr += sizeof(hdr);

    sec_hdr.sec_type = CRASH_SEC_REGS;
    memset(sec_hdr.reserved, 0, sizeof(sec_hdr.reserved));
    sec_hdr.addr = 0;
    sec_hdr.len = sizeof(gdbstub_savedRegs);
    ffs_program(fd, faddr, &sec_hdr, sizeof(sec_hdr));
    faddr += sizeof(sec_hdr);

    ffs_program(fd, faddr, &gdbstub_savedRegs, sizeof(gdbstub_savedRegs));
    faddr += sizeof(gdbstub_savedRegs);

    sec_hdr.sec_type = CRASH_SEC_MEM;
    memset(sec_hdr.reserved, 0, sizeof(sec_hdr.reserved));
    sec_hdr.addr = start;
    sec_hdr.len = end - start;
    ffs_program(fd, faddr, &sec_hdr, sizeof(sec_hdr));
    faddr += sizeof(sec_hdr);

    ffs_program(fd, faddr, (void *)start, sec_hdr.len);
    faddr += sec_hdr.len;
    printf("core dumped slot %d, len %d\n", EepromCopyApp.Error_Cnt, faddr - in_faddr);
}

static void crash_dump(uint32_t start, uint32_t end)
{
    ffs_device_t fd;
    ffs_file_t file;
    ffs_file_id_et fids[] = {FLASH_DATA_LOG, FLASH_CORE_DUMP_0, FLASH_CORE_DUMP_1};

    fd = FLASH_SPI;
    ffs_remove_files(fd, fids, 3);

    file.id = FLASH_CORE_DUMP_0 + EepromCopyApp.Error_Cnt - 1;
    file.len = crash_dump_len(start, end);
    file.load_addr = 0;
    if (ffs_add_file(fd, &file)) {
        return;
    }

    ffs_erase(fd, file.faddr, file.len);

    crash_dump_write(fd, file.faddr, start, end);

    EepromCopyApp.Ram_Dumped = EepromCopyApp.Error_Cnt;
    EepromCopyApp.Ram_Dumped_fd = fd;
    config_save();
}

//Get the value of one of the A registers
static unsigned int getaregval(int reg) {
    if (reg==0) return gdbstub_savedRegs.a0;
    if (reg==1) return gdbstub_savedRegs.a1;
    return gdbstub_savedRegs.a[reg-2];
}

static void print_stack(uint32_t start, uint32_t end)
{
    uint32_t pos;
    printf("\nStack dump x%x x%x:\n", start, end);
    for (pos = start; pos < end; pos += 0x10) {
        uint32_t *values = (uint32_t*) (pos);
        // rough indicator: stack frames usually have SP saved as the second word
        bool looksLikeStackFrame = (values[2] == pos + 0x10);

        printf("%08lx:  %08lx %08lx %08lx %08lx %c\n", pos, values[0],
                values[1], values[2], values[3],
                (looksLikeStackFrame) ? '<' : ' ');
    }
    printf("\n");
}

static void print_reason()
{
    int i;
    struct XTensa_exception_frame_s *reg = &gdbstub_savedRegs;
    printf("Fatal exception %ld\n", reg->reason);
    printf("pc=0x%08lx sp=0x%08lx excvaddr=0x%08lx\n", reg->pc, reg->a1,
            reg->excvaddr);
    printf("ps=0x%08lx sar=0x%08lx vpri=0x%08lx\n", reg->ps, reg->sar,
            reg->vpri);
    for (i = 0; i < 16; i++) {
        unsigned int r = getaregval(i);
        printf("r%02d: 0x%08x=%10d ", i, r, r);
        if (i % 3 == 2)
            printf("\n");
    }
    printf("\n");
    print_stack(getaregval(1), 0x3fffffb0);
}

//Non-OS exception handler. Gets called by the Xtensa HAL.
static void exception_handler(struct XTensa_exception_frame_s *frame)
{
    gdbstub_save_extra_sfrs_for_exception();

    os_memcpy(&gdbstub_savedRegs, frame, 19 * 4);

    //Credits go to Cesanta for this trick. A1 seems to be destroyed, but because it
    //has a fixed offset from the address of the passed frame, we can recover it.
    //gdbstub_savedRegs.a1=(uint32_t)frame+EXCEPTION_GDB_SP_OFFSET;
    gdbstub_savedRegs.a1 = (uint32_t) frame;

    ets_wdt_disable();

    print_reason();

    if (crash_in_exception++ == 0 &&
            EepromCopyApp.Error_Cnt < CONFIG_ERROR_LOG_CNT &&
            !config_in_assertion_failure) {
        config_log_error("exception", __LINE__, gdbstub_savedRegs.pc, gdbstub_savedRegs.excvaddr);
        crash_dump(getaregval(1), 0x3fffffb0);
    }
    BSP_RESET();
}

static void ICACHE_FLASH_ATTR install_exceptions() {
    int i;
    int exno[]={EXCCAUSE_ILLEGAL, EXCCAUSE_SYSCALL, EXCCAUSE_INSTR_ERROR, EXCCAUSE_LOAD_STORE_ERROR,
            EXCCAUSE_DIVIDE_BY_ZERO, EXCCAUSE_UNALIGNED, EXCCAUSE_INSTR_DATA_ERROR, EXCCAUSE_LOAD_STORE_DATA_ERROR,
            EXCCAUSE_INSTR_ADDR_ERROR, EXCCAUSE_LOAD_STORE_ADDR_ERROR, EXCCAUSE_INSTR_PROHIBITED,
            EXCCAUSE_LOAD_PROHIBITED, EXCCAUSE_STORE_PROHIBITED};
    for (i=0; i<(sizeof(exno)/sizeof(exno[0])); i++) {
        _xtos_set_exception_handler(exno[i], exception_handler);
    }
}
void ICACHE_FLASH_ATTR crash_init()
{
    install_exceptions();
}

/** called after an assertion failure up to twice per firmware release */
void wifi_hw_dump_core()
{
    /* cause an exception */
    __asm__ __volatile__ ("syscall");
}

@flameboss
Copy link
Author

flameboss commented Jun 23, 2020

[Edit] Sorry I don't know how to insert code without it getting messed up by the markdown...
crash.h

#ifndef CRASH_H_
#define CRASH_H_

#include <stdint.h>

#define CRASH_IMAGE_VERSION   0

typedef enum __attribute__((__packed__)) {
    CRASH_SEC_REGS = 0,
    CRASH_SEC_MEM = 1
} crash_sec_type_et;

typedef struct {
    uint8_t version;
    uint8_t num_sections;
    uint8_t hw_id;
    uint8_t pos;
    uint32_t fw_ver;
} crash_header_st;

typedef struct {
    crash_sec_type_et sec_type;
    uint8_t reserved[3];
    uint32_t addr;
    uint32_t len;
} crash_sec_header_st;


void crash_init(void);

#endif /* CRASH_H_ */

@kriegste
Copy link

First mark the code, then hit the "code" button.

@flameboss
Copy link
Author

This is in a closed source part of the SDK, but I need to solve this. I'm in the middle of porting our whole app to the RTOS SDK, but I'd like some feedback on this crash. Will RTOS SDK fix it? Is anyone available to provide support or consulting?

@FayeY
Copy link
Collaborator

FayeY commented Jul 9, 2020

Could you have a try with the latest master? It should have solved this issue.
If this issue persists, please feel free to let us know.
Thanks.

@flameboss
Copy link
Author

Thank you! We will give it a try now.

@flameboss
Copy link
Author

I'm sorry but that got worse for me. I do not know how to reproduce the crash - I only know that many of our customers are getting it - but now I got exceptions as soon as it started associating with an AP:

Fatal exception 3
pc=0x402078d7 sp=0x3ffffc10 excvaddr=0x4024f5f4
ps=0x00000030 sar=0x0000001e vpri=0x3fff07ac
r00: 0x40207883=1075869827 r01: 0x3ffffc10=1073740816 r02: 0x00000002= 2
r03: 0x0000000b= 11 r04: 0x3fff4174=1073693044 r05: 0x4024f5f4=1076164084
r06: 0x3fff3ff4=1073692660 r07: 0x3ffffd10=1073741072 r08: 0x00000001= 1
r09: 0x3fff1ac0=1073683136 r10: 0x00000016= 22 r11: 0x01205355= 18895701
r12: 0x00000002= 2 r13: 0x3fff4860=1073694816 r14: 0x00000000= 0
r15: 0x3fff4860=1073694816

0x402078d7: ieee80211_setup_ratetable at ??:?
0x40207883: ieee80211_phy_init at ??:?
0x4024f5f4: chip_v6_unset_chanfreq at ??:?
0x4000dd3b: ?? ??:0
0x40002ef1: ?? ??:0
0x00000005: ?? ??:0
0x3ffefef5: ?? ??:0
0x40204d1c: ieee80211_setup_rates at ??:?
0x4020b4f4: cnx_update_bss_more at ??:?
0x4020b511: cnx_update_bss_more at ??:?
0x40208f87: scan_parse_beacon at ??:?
0x40204e09: ieee80211_parse_beacon at ??:?
0x40000f58: ?? ??:0
0x40100a81: wdt_feed at ??:?
0x40238f9b: ppPeocessRxPktHdr at ??:?
0x40000e19: ?? ??:0
0x40100283: call_user_start_local at ??:?
0x4010000d: call_user_start at ??:?

st0ff3r pushed a commit to nabovarme/MeterLogger that referenced this issue Oct 31, 2020
@kriegste
Copy link

#345

@FayeY
Copy link
Collaborator

FayeY commented Aug 6, 2021

May I ask if this issue still persists with the latest SDK?

@kriegste
Copy link

kriegste commented Aug 6, 2021

According to
#345
the cause is still present.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

3 participants