diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml new file mode 100644 index 000000000..4648c7cf7 --- /dev/null +++ b/.github/workflows/codeql.yml @@ -0,0 +1,77 @@ +# For most projects, this workflow file will not need changing; you simply need +# to commit it to your repository. +# +# You may wish to alter this file to override the set of languages analyzed, +# or to provide custom queries or build logic. +# +# ******** NOTE ******** +# We have attempted to detect the languages in your repository. Please check +# the `language` matrix defined below to confirm you have the correct set of +# supported CodeQL languages. +# +name: "CodeQL" + +on: + push: + branches: [ "master" ] + pull_request: + # The branches below must be a subset of the branches above + branches: [ "master" ] + schedule: + - cron: '45 14 * * 3' + +jobs: + analyze: + name: Analyze + runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }} + timeout-minutes: ${{ (matrix.language == 'swift' && 120) || 360 }} + permissions: + actions: read + contents: read + security-events: write + + strategy: + fail-fast: false + matrix: + language: [ 'cpp', 'javascript', 'python', 'ruby' ] + # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby', 'swift' ] + # Use only 'java' to analyze code written in Java, Kotlin or both + # Use only 'javascript' to analyze code written in JavaScript, TypeScript or both + # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support + + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + # Initializes the CodeQL tools for scanning. + - name: Initialize CodeQL + uses: github/codeql-action/init@v2 + with: + languages: ${{ matrix.language }} + # If you wish to specify custom queries, you can do so here or in a config file. + # By default, queries listed here will override any specified in a config file. + # Prefix the list here with "+" to use these queries and those in the config file. + + # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs + # queries: security-extended,security-and-quality + + + # Autobuild attempts to build any compiled languages (C/C++, C#, Go, Java, or Swift). + # If this step fails, then you should remove it and run the build manually (see below) + - name: Autobuild + uses: github/codeql-action/autobuild@v2 + + # ℹī¸ Command-line programs to run using the OS shell. + # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun + + # If the Autobuild fails above, remove it and uncomment the following three lines. + # modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance. + + # - run: | + # echo "Run, Build Application using script" + # ./location_of_script_within_repo/buildscript.sh + + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@v2 + with: + category: "/language:${{matrix.language}}" diff --git a/CMakeLists.txt b/CMakeLists.txt index ec620b7e2..75ffe79b8 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,7 +4,7 @@ project(cmark-gfm) set(PROJECT_VERSION_MAJOR 0) set(PROJECT_VERSION_MINOR 29) set(PROJECT_VERSION_PATCH 0) -set(PROJECT_VERSION_GFM 11) +set(PROJECT_VERSION_GFM 13) set(PROJECT_VERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}.gfm.${PROJECT_VERSION_GFM}) include("FindAsan.cmake") diff --git a/api_test/main.c b/api_test/main.c index e950451ba..e0fe02968 100644 --- a/api_test/main.c +++ b/api_test/main.c @@ -836,7 +836,7 @@ static void test_continuation_byte(test_batch_runner *runner, char *html = cmark_markdown_to_html(buf, strlen(buf), CMARK_OPT_VALIDATE_UTF8); - STR_EQ(runner, html, expected, "invalid utf8 continuation byte %d/%d", pos, + STR_EQ(runner, html, expected, "invalid utf8 continuation byte %zu/%zu", pos, len); free(html); } diff --git a/changelog.txt b/changelog.txt index 848e71029..94e8a19fd 100644 --- a/changelog.txt +++ b/changelog.txt @@ -1,3 +1,15 @@ +[0.29.0.gfm.13] + * Normalized marker row vs. delimiter row nomenclature (#273) + * Exposed CMARK_NODE_FOOTNOTE_DEFINITION literal value (#336) + * Fix format specifier for printing a size_t (#340) + +[0.29.0.gfm.12] + + * Fixed polynomial time complexity issues per + https://github.com/github/cmark-gfm/security/advisories/GHSA-w4qg-3vf7-m9x5 + * Added CodeQL project integration (#337) + * Addressed const qualifier discard compiler warnings (#330, #331) + [0.29.0.gfm.11] * Improved fixes for polynomial time complexity issues per diff --git a/extensions/autolink.c b/extensions/autolink.c index 491d96c32..d898cd2d4 100644 --- a/extensions/autolink.c +++ b/extensions/autolink.c @@ -296,7 +296,7 @@ static cmark_node *match(cmark_syntax_extension *ext, cmark_parser *parser, // inline was finished in inlines.c. } -static bool validate_protocol(char protocol[], uint8_t *data, size_t rewind, size_t max_rewind) { +static bool validate_protocol(const char protocol[], uint8_t *data, size_t rewind, size_t max_rewind) { size_t len = strlen(protocol); if (len > (max_rewind - rewind)) { diff --git a/extensions/table.c b/extensions/table.c index 337dcf02f..6c7a2bbae 100644 --- a/extensions/table.c +++ b/extensions/table.c @@ -11,6 +11,9 @@ #include "table.h" #include "cmark-gfm-core-extensions.h" +// Limit to prevent a malicious input from causing a denial of service. +#define MAX_AUTOCOMPLETED_CELLS 0x80000 + // Custom node flag, initialized in `create_table_extension`. static cmark_node_internal_flags CMARK_NODE__TABLE_VISITED; @@ -19,6 +22,7 @@ cmark_node_type CMARK_NODE_TABLE, CMARK_NODE_TABLE_ROW, typedef struct { unsigned colspan, rowspan; + int cell_index; } node_cell_data; typedef struct { @@ -36,6 +40,8 @@ typedef struct { typedef struct { uint16_t n_columns; uint8_t *alignments; + int n_rows; + int n_nonempty_cells; } node_table; typedef struct { @@ -94,6 +100,33 @@ static int set_n_table_columns(cmark_node *node, uint16_t n_columns) { return 1; } +// Increment the number of rows in the table. Also update n_nonempty_cells, +// which keeps track of the number of cells which were parsed from the +// input file. (If one of the rows is too short, then the trailing cells +// are autocompleted. Autocompleted cells are not counted in n_nonempty_cells.) +// The purpose of this is to prevent a malicious input from generating a very +// large number of autocompleted cells, which could cause a denial of service +// vulnerability. +static int incr_table_row_count(cmark_node *node, int i) { + if (!node || node->type != CMARK_NODE_TABLE) { + return 0; + } + + ((node_table *)node->as.opaque)->n_rows++; + ((node_table *)node->as.opaque)->n_nonempty_cells += i; + return 1; +} + +// Calculate the number of autocompleted cells. +static int get_n_autocompleted_cells(cmark_node *node) { + if (!node || node->type != CMARK_NODE_TABLE) { + return 0; + } + + const node_table *nt = (node_table *)node->as.opaque; + return (nt->n_columns * nt->n_rows) - nt->n_nonempty_cells; +} + static uint8_t *get_table_alignments(cmark_node *node) { if (!node || node->type != CMARK_NODE_TABLE) return 0; @@ -109,6 +142,29 @@ static int set_table_alignments(cmark_node *node, uint8_t *alignments) { return 1; } +static uint8_t get_cell_alignment(cmark_node *node) { + if (!node || node->type != CMARK_NODE_TABLE_CELL) + return 0; + + node_cell_data *data = (node_cell_data *)node->as.opaque; + if (!data) + return 0; + const uint8_t *alignments = get_table_alignments(node->parent->parent); + int i = data->cell_index; + return alignments[i]; +} + +static int set_cell_index(cmark_node *node, int i) { + if (!node || node->type != CMARK_NODE_TABLE_CELL) + return 0; + + node_cell_data *data = (node_cell_data *)node->as.opaque; + if (!data) + return 0; + data->cell_index = i; + return 1; +} + static unsigned get_cell_colspan(cmark_node *node) { if (!node || node->type != CMARK_NODE_TABLE_CELL) return UINT_MAX; @@ -261,10 +317,9 @@ static table_row *row_from_string(cmark_syntax_extension *self, --cell->start_offset; ++cell->internal_offset; } + cell->cell_data = (node_cell_data *)parser->mem->calloc(1, sizeof(node_cell_data)); if (parser->options & CMARK_OPT_TABLE_SPANS) { - cell->cell_data = (node_cell_data *)parser->mem->calloc(1, sizeof(node_cell_data)); - // Check for a column-spanning cell if (row->n_columns > 0 && cmark_strbuf_len(cell->buf) == 0 && cell->start_offset == cell->end_offset) { cell->cell_data->colspan = 0; @@ -294,7 +349,8 @@ static table_row *row_from_string(cmark_syntax_extension *self, } } } else { - cell->cell_data = NULL; + cell->cell_data->colspan = 1; + cell->cell_data->rowspan = 1; } // make sure we never wrap row->n_columns @@ -366,7 +422,7 @@ static cmark_node *try_opening_table_header(cmark_syntax_extension *self, unsigned char *input, int len) { cmark_node *table_header; table_row *header_row = NULL; - table_row *marker_row = NULL; + table_row *delimiter_row = NULL; node_table_row *ntr; const char *parent_string; uint16_t i; @@ -379,16 +435,16 @@ static cmark_node *try_opening_table_header(cmark_syntax_extension *self, return parent_container; } - // Since scan_table_start was successful, we must have a marker row. - marker_row = row_from_string(self, parser, - input + cmark_parser_get_first_nonspace(parser), - len - cmark_parser_get_first_nonspace(parser)); + // Since scan_table_start was successful, we must have a delimiter row. + delimiter_row = row_from_string( + self, parser, input + cmark_parser_get_first_nonspace(parser), + len - cmark_parser_get_first_nonspace(parser)); // assert may be optimized out, don't rely on it for security boundaries - if (!marker_row) { + if (!delimiter_row) { return parent_container; } - assert(marker_row); + assert(delimiter_row); cmark_arena_push(); @@ -398,8 +454,8 @@ static cmark_node *try_opening_table_header(cmark_syntax_extension *self, parent_string = cmark_node_get_string_content(parent_container); header_row = row_from_string(self, parser, (unsigned char *)parent_string, (int)strlen(parent_string)); - if (!header_row || header_row->n_columns != marker_row->n_columns) { - free_table_row(parser->mem, marker_row); + if (!header_row || header_row->n_columns != delimiter_row->n_columns) { + free_table_row(parser->mem, delimiter_row); free_table_row(parser->mem, header_row); cmark_arena_pop(); parent_container->flags |= CMARK_NODE__TABLE_VISITED; @@ -407,14 +463,14 @@ static cmark_node *try_opening_table_header(cmark_syntax_extension *self, } if (cmark_arena_pop()) { - marker_row = row_from_string( + delimiter_row = row_from_string( self, parser, input + cmark_parser_get_first_nonspace(parser), len - cmark_parser_get_first_nonspace(parser)); header_row = row_from_string(self, parser, (unsigned char *)parent_string, (int)strlen(parent_string)); // row_from_string can return NULL, add additional check to ensure n_columns match - if (!marker_row || !header_row || header_row->n_columns != marker_row->n_columns) { - free_table_row(parser->mem, marker_row); + if (!delimiter_row || !header_row || header_row->n_columns != delimiter_row->n_columns) { + free_table_row(parser->mem, delimiter_row); free_table_row(parser->mem, header_row); return parent_container; } @@ -422,7 +478,7 @@ static cmark_node *try_opening_table_header(cmark_syntax_extension *self, if (!cmark_node_set_type(parent_container, CMARK_NODE_TABLE)) { free_table_row(parser->mem, header_row); - free_table_row(parser->mem, marker_row); + free_table_row(parser->mem, delimiter_row); return parent_container; } @@ -435,12 +491,12 @@ static cmark_node *try_opening_table_header(cmark_syntax_extension *self, parent_container->as.opaque = parser->mem->calloc(1, sizeof(node_table)); set_n_table_columns(parent_container, header_row->n_columns); - // allocate alignments based on marker_row->n_columns - // since we populate the alignments array based on marker_row->cells + // allocate alignments based on delimiter_row->n_columns + // since we populate the alignments array based on delimiter_row->cells uint8_t *alignments = - (uint8_t *)parser->mem->calloc(marker_row->n_columns, sizeof(uint8_t)); - for (i = 0; i < marker_row->n_columns; ++i) { - node_cell *node = &marker_row->cells[i]; + (uint8_t *)parser->mem->calloc(delimiter_row->n_columns, sizeof(uint8_t)); + for (i = 0; i < delimiter_row->n_columns; ++i) { + node_cell *node = &delimiter_row->cells[i]; bool left = node->buf->ptr[0] == ':', right = node->buf->ptr[node->buf->size - 1] == ':'; if (left && right) @@ -462,27 +518,28 @@ static cmark_node *try_opening_table_header(cmark_syntax_extension *self, table_header->as.opaque = ntr = (node_table_row *)parser->mem->calloc(1, sizeof(node_table_row)); ntr->is_header = true; - { - for (i = 0; i < header_row->n_columns; ++i) { - node_cell *cell = &header_row->cells[i]; - cmark_node *header_cell = cmark_parser_add_child(parser, table_header, - CMARK_NODE_TABLE_CELL, parent_container->start_column + cell->start_offset); - header_cell->start_line = header_cell->end_line = parent_container->start_line; - header_cell->internal_offset = cell->internal_offset; - header_cell->end_column = parent_container->start_column + cell->end_offset; - header_cell->as.opaque = cell->cell_data; - cell->cell_data = NULL; - cmark_node_set_string_content(header_cell, (char *) cell->buf->ptr); - cmark_node_set_syntax_extension(header_cell, self); - } + for (i = 0; i < header_row->n_columns; ++i) { + node_cell *cell = &header_row->cells[i]; + cmark_node *header_cell = cmark_parser_add_child(parser, table_header, + CMARK_NODE_TABLE_CELL, parent_container->start_column + cell->start_offset); + header_cell->start_line = header_cell->end_line = parent_container->start_line; + header_cell->internal_offset = cell->internal_offset; + header_cell->end_column = parent_container->start_column + cell->end_offset; + header_cell->as.opaque = cell->cell_data; + cell->cell_data = NULL; + cmark_node_set_string_content(header_cell, (char *) cell->buf->ptr); + cmark_node_set_syntax_extension(header_cell, self); + set_cell_index(header_cell, i); } + incr_table_row_count(parent_container, i); + cmark_parser_advance_offset( parser, (char *)input, (int)strlen((char *)input) - 1 - cmark_parser_get_offset(parser), false); free_table_row(parser->mem, header_row); - free_table_row(parser->mem, marker_row); + free_table_row(parser->mem, delimiter_row); return parent_container; } @@ -496,6 +553,10 @@ static cmark_node *try_opening_table_row(cmark_syntax_extension *self, if (cmark_parser_is_blank(parser)) return NULL; + if (get_n_autocompleted_cells(parent_container) > MAX_AUTOCOMPLETED_CELLS) { + return NULL; + } + table_row_block = cmark_parser_add_child(parser, parent_container, CMARK_NODE_TABLE_ROW, parent_container->start_column); @@ -555,12 +616,16 @@ static cmark_node *try_opening_table_row(cmark_syntax_extension *self, cell->cell_data = NULL; cmark_node_set_string_content(node, (char *) cell->buf->ptr); cmark_node_set_syntax_extension(node, self); + set_cell_index(node, i); } + incr_table_row_count(parent_container, i); + for (; i < table_columns; ++i) { cmark_node *node = cmark_parser_add_child( parser, table_row_block, CMARK_NODE_TABLE_CELL, 0); cmark_node_set_syntax_extension(node, self); + set_cell_index(node, i); } } @@ -759,13 +824,7 @@ static const char *xml_attr(cmark_syntax_extension *extension, cmark_node *node) { if (node->type == CMARK_NODE_TABLE_CELL) { if (cmark_gfm_extensions_get_table_row_is_header(node->parent)) { - uint8_t *alignments = get_table_alignments(node->parent->parent); - int i = 0; - cmark_node *n; - for (n = node->parent->first_child; n; n = n->next, ++i) - if (n == node) - break; - switch (alignments[i]) { + switch (get_cell_alignment(node)) { case 'l': return " align=\"left\""; case 'c': return " align=\"center\""; case 'r': return " align=\"right\""; @@ -886,7 +945,6 @@ static void html_render(cmark_syntax_extension *extension, cmark_event_type ev_type, int options) { bool entering = (ev_type == CMARK_EVENT_ENTER); cmark_strbuf *html = renderer->html; - cmark_node *n; // XXX: we just monopolise renderer->opaque. struct html_table_state *table_state = @@ -939,7 +997,6 @@ static void html_render(cmark_syntax_extension *extension, unsigned rowspan = get_cell_rowspan(node); // A value of zero in either span means that this cell is a "filler" cell. Don't render those. if (colspan > 0 && rowspan > 0) { - uint8_t *alignments = get_table_alignments(node->parent->parent); if (entering) { cmark_html_render_cr(html); if (table_state->in_table_header) { @@ -948,12 +1005,7 @@ static void html_render(cmark_syntax_extension *extension, cmark_strbuf_puts(html, "parent->first_child; n; n = n->next, ++i) - if (n == node) - break; - - switch (alignments[i]) { + switch (get_cell_alignment(node)) { case 'l': html_table_add_align(html, "left", options); break; case 'c': html_table_add_align(html, "center", options); break; case 'r': html_table_add_align(html, "right", options); break; diff --git a/src/blocks.c b/src/blocks.c index a7290652c..f3b20580e 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -1246,15 +1246,17 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container, parser->first_nonspace + 1); S_advance_offset(parser, input, input->len - 1 - parser->offset, false); } else if (!indented && - parser->options & CMARK_OPT_FOOTNOTES && + (parser->options & CMARK_OPT_FOOTNOTES) && + depth < MAX_LIST_DEPTH && (matched = scan_footnote_definition(input, parser->first_nonspace))) { cmark_chunk c = cmark_chunk_dup(input, parser->first_nonspace + 2, matched - 2); - cmark_chunk_to_cstr(parser->mem, &c); while (c.data[c.len - 1] != ']') --c.len; --c.len; + cmark_chunk_to_cstr(parser->mem, &c); + S_advance_offset(parser, input, parser->first_nonspace + matched - parser->offset, false); *container = add_child(parser, *container, CMARK_NODE_FOOTNOTE_DEFINITION, parser->first_nonspace + matched + 1); (*container)->as.literal = c; diff --git a/src/commonmark.c b/src/commonmark.c index d296567e0..f1698687f 100644 --- a/src/commonmark.c +++ b/src/commonmark.c @@ -166,7 +166,7 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node, char fencechar[2] = {'\0', '\0'}; size_t info_len, code_len; char listmarker[LISTMARKER_SIZE]; - char *emph_delim; + const char *emph_delim; bool first_in_list_item; bufsize_t marker_width; bool allow_wrap = renderer->width > 0 && !(CMARK_OPT_NOBREAKS & options) && diff --git a/src/node.c b/src/node.c index d93ed307d..49c371ff2 100644 --- a/src/node.c +++ b/src/node.c @@ -417,6 +417,7 @@ const char *cmark_node_get_literal(cmark_node *node) { case CMARK_NODE_HTML_INLINE: case CMARK_NODE_CODE: case CMARK_NODE_FOOTNOTE_REFERENCE: + case CMARK_NODE_FOOTNOTE_DEFINITION: return cmark_chunk_to_cstr(NODE_MEM(node), &node->as.literal); case CMARK_NODE_CODE_BLOCK: diff --git a/test/extensions.txt b/test/extensions.txt index fe98fe548..19c06d5fa 100644 --- a/test/extensions.txt +++ b/test/extensions.txt @@ -208,7 +208,7 @@ fff | ggg | hhh | iii | jjj ### Table cell count mismatches -The header and marker row must match. +The header and delimiter row must match. ```````````````````````````````` example | a | b | c |