diff --git a/Sources/Markdown/Base/Document.swift b/Sources/Markdown/Base/Document.swift index 0a48118f..8bd233c0 100644 --- a/Sources/Markdown/Base/Document.swift +++ b/Sources/Markdown/Base/Document.swift @@ -38,31 +38,49 @@ public extension Document { /// Parse a string into a `Document`. /// /// - parameter string: the input Markdown text to parse. - /// - parameter options: options for parsing Markdown text. - /// - parameter source: an explicit source URL from which the input `string` came for marking source locations. + /// - parameter options: options for parsing Markdown text, including + /// Commonmark-specific options and extensions. /// This need not be a file URL. - init(parsing string: String, source: URL? = nil, options: ParseOptions = []) { - if options.contains(.parseBlockDirectives) { + init(parsing string: String, source: URL? = nil, convertOptions options: ConvertOptions) { + if options.parseOptions.contains(.parseBlockDirectives) { self = BlockDirectiveParser.parse(string, source: source, options: options) } else { self = MarkupParser.parseString(string, source: source, options: options) } } + + /// Parse a string into a `Document`. + /// + /// - parameter string: the input Markdown text to parse. + /// - parameter options: options for parsing Markdown text. + /// - parameter source: an explicit source URL from which the input `string` came for marking source locations. + /// This need not be a file URL. + init(parsing string: String, source: URL? = nil, options: ParseOptions = []) { + self.init(parsing: string, source: source, convertOptions: .init(fromParseOptions: options)) + } /// Parse a file's contents into a `Document`. /// - /// - parameter file: a file URL from which to load Markdown text to parse. - /// - parameter options: options for parsing Markdown text. - init(parsing file: URL, options: ParseOptions = []) throws { + /// - parameter options: options for parsing Markdown text, including + /// Commonmark-specific options and extensions. + init(parsing file: URL, convertOptions options: ConvertOptions) throws { let string = try String(contentsOf: file) - if options.contains(.parseBlockDirectives) { + if options.parseOptions.contains(.parseBlockDirectives) { self = BlockDirectiveParser.parse(string, source: file, options: options) } else { self = MarkupParser.parseString(string, source: file, options: options) } } + + /// Parse a file's contents into a `Document`. + /// + /// - parameter file: a file URL from which to load Markdown text to parse. + /// - parameter options: options for parsing Markdown text. + init(parsing file: URL, options: ParseOptions = []) throws { + try self.init(parsing: file, convertOptions: .init(fromParseOptions: options)) + } /// Create a document from a sequence of block markup elements. init(_ children: Children) where Children.Element == BlockMarkup { diff --git a/Sources/Markdown/Markdown.docc/Markdown/FormatterAndOptions.md b/Sources/Markdown/Markdown.docc/Markdown/FormatterAndOptions.md index fa34e474..7e10728a 100644 --- a/Sources/Markdown/Markdown.docc/Markdown/FormatterAndOptions.md +++ b/Sources/Markdown/Markdown.docc/Markdown/FormatterAndOptions.md @@ -9,5 +9,7 @@ ### Options - ``MarkupDumpOptions`` +- ``ParseOptions`` +- ``ConvertOptions`` diff --git a/Sources/Markdown/Parser/BlockDirectiveParser.swift b/Sources/Markdown/Parser/BlockDirectiveParser.swift index 3dd9100e..aa82ac58 100644 --- a/Sources/Markdown/Parser/BlockDirectiveParser.swift +++ b/Sources/Markdown/Parser/BlockDirectiveParser.swift @@ -491,7 +491,7 @@ private enum ParseContainer: CustomStringConvertible { /// A Doxygen command, which can contain arbitrary markup (but not block directives). case doxygenCommand(PendingDoxygenCommand, [TrimmedLine]) - init(parsingHierarchyFrom trimmedLines: TrimmedLines, options: ParseOptions) where TrimmedLines.Element == TrimmedLine { + init(parsingHierarchyFrom trimmedLines: TrimmedLines, options: ConvertOptions) where TrimmedLines.Element == TrimmedLine { self = ParseContainerStack(parsingHierarchyFrom: trimmedLines, options: options).top } @@ -663,7 +663,7 @@ private enum ParseContainer: CustomStringConvertible { /// Convert this container to the corresponding ``RawMarkup`` node. func convertToRawMarkup(ranges: inout RangeTracker, parent: ParseContainer?, - options: ParseOptions) -> [RawMarkup] { + options: ConvertOptions) -> [RawMarkup] { switch self { case let .root(children): let rawChildren = children.flatMap { @@ -749,9 +749,9 @@ struct ParseContainerStack { /// The stack of containers to be incrementally folded into a hierarchy. private var stack: [ParseContainer] - private let options: ParseOptions + private let options: ConvertOptions - init(parsingHierarchyFrom trimmedLines: TrimmedLines, options: ParseOptions) where TrimmedLines.Element == TrimmedLine { + init(parsingHierarchyFrom trimmedLines: TrimmedLines, options: ConvertOptions) where TrimmedLines.Element == TrimmedLine { self.stack = [.root([])] self.options = options for line in trimmedLines { @@ -772,7 +772,7 @@ struct ParseContainerStack { } private var canParseDoxygenCommand: Bool { - guard options.contains(.parseMinimalDoxygen) else { return false } + guard options.parseOptions.contains(.parseMinimalDoxygen) else { return false } guard !isInBlockDirective else { return false } @@ -1105,7 +1105,7 @@ extension Document { /// /// - Precondition: The `rootContainer` must be the `.root` case. fileprivate init(converting rootContainer: ParseContainer, from source: URL?, - options: ParseOptions) { + options: ConvertOptions) { guard case .root = rootContainer else { fatalError("Tried to convert a non-root container to a `Document`") } @@ -1128,14 +1128,14 @@ extension Document { } struct BlockDirectiveParser { - static func parse(_ input: URL, options: ParseOptions = []) throws -> Document { + static func parse(_ input: URL, options: ConvertOptions = .init()) throws -> Document { let string = try String(contentsOf: input, encoding: .utf8) return parse(string, source: input, options: options) } /// Parse the input. static func parse(_ input: String, source: URL?, - options: ParseOptions = []) -> Document { + options: ConvertOptions = .init()) -> Document { // Phase 0: Split the input into lines lazily, keeping track of // line numbers, consecutive blank lines, and start positions on each line where indentation ends. // These trim points may be used to adjust the indentation seen by the CommonMark parser when diff --git a/Sources/Markdown/Parser/CommonMarkConverter.swift b/Sources/Markdown/Parser/CommonMarkConverter.swift index 510bbfda..244de737 100644 --- a/Sources/Markdown/Parser/CommonMarkConverter.swift +++ b/Sources/Markdown/Parser/CommonMarkConverter.swift @@ -608,25 +608,18 @@ struct MarkupParser { return MarkupConversion(state: childConversion.state.next(), result: .inlineAttributes(attributes: attributes, parsedRange: parsedRange, childConversion.result)) } - static func parseString(_ string: String, source: URL?, options: ParseOptions) -> Document { + static func parseString(_ string: String, source: URL?, options: ConvertOptions) -> Document { cmark_gfm_core_extensions_ensure_registered() - - var cmarkOptions = CMARK_OPT_TABLE_SPANS - if !options.contains(.disableSmartOpts) { - cmarkOptions |= CMARK_OPT_SMART - } - if !options.contains(.disableSourcePosOpts) { - cmarkOptions |= CMARK_OPT_SOURCEPOS - } - let parser = cmark_parser_new(cmarkOptions) + let parser = cmark_parser_new(options.commonmarkOptions.rawValue) + + for ext in options.commonmarkExtensions { + cmark_parser_attach_syntax_extension(parser, cmark_find_syntax_extension(ext)) + } - cmark_parser_attach_syntax_extension(parser, cmark_find_syntax_extension("table")) - cmark_parser_attach_syntax_extension(parser, cmark_find_syntax_extension("strikethrough")) - cmark_parser_attach_syntax_extension(parser, cmark_find_syntax_extension("tasklist")) cmark_parser_feed(parser, string, string.utf8.count) let rawDocument = cmark_parser_finish(parser) - let initialState = MarkupConverterState(source: source, iterator: cmark_iter_new(rawDocument), event: CMARK_EVENT_NONE, node: nil, options: options, headerSeen: false, pendingTableBody: nil).next() + let initialState = MarkupConverterState(source: source, iterator: cmark_iter_new(rawDocument), event: CMARK_EVENT_NONE, node: nil, options: options.parseOptions, headerSeen: false, pendingTableBody: nil).next() precondition(initialState.event == CMARK_EVENT_ENTER) precondition(initialState.nodeType == .document) let conversion = convertAnyElement(initialState) diff --git a/Sources/Markdown/Parser/ConvertOptions.swift b/Sources/Markdown/Parser/ConvertOptions.swift new file mode 100644 index 00000000..c9f5c94a --- /dev/null +++ b/Sources/Markdown/Parser/ConvertOptions.swift @@ -0,0 +1,132 @@ +/* + This source file is part of the Swift.org open source project + + Copyright (c) 2021 Apple Inc. and the Swift project authors + Licensed under Apache License v2.0 with Runtime Library Exception + + See https://swift.org/LICENSE.txt for license information + See https://swift.org/CONTRIBUTORS.txt for Swift project authors +*/ + +import cmark_gfm + +/// Options to use when converting Markdown. +public struct ConvertOptions { + public let parseOptions: ParseOptions + public let commonmarkOptions: CommonmarkOptions + public let commonmarkExtensions: [String] + + public init(parseOptions: ParseOptions, commonmarkOptions: CommonmarkOptions, extensions: [String]) { + self.parseOptions = parseOptions + self.commonmarkOptions = commonmarkOptions + self.commonmarkExtensions = extensions + } + + public init(fromParseOptions options: ParseOptions) { + var commonmarkOptions = ConvertOptions.defaultCommonmarkOptions + if options.contains(.disableSmartOpts) { + commonmarkOptions.remove(.smart) + } + if options.contains(.disableSourcePosOpts) { + commonmarkOptions.remove(.sourcepos) + } + self.init( + parseOptions: options, + commonmarkOptions: commonmarkOptions, + extensions: ConvertOptions.defaultCommonmarkExtensions + ) + } + + public init() { + self.init(fromParseOptions: ConvertOptions.defaultParseOptions) + } + + public static let defaultParseOptions: ParseOptions = [] + public static let defaultCommonmarkOptions: CommonmarkOptions = [ + .smart, + .tableSpans, + .sourcepos + ] + public static let defaultCommonmarkExtensions: [String] = [ + "table", + "strikethrough", + "tasklist", + ] +} + +/// Options given to the Commonmark converter. +public struct CommonmarkOptions: OptionSet { + public var rawValue: Int32 + + public init(rawValue: Int32) { + self.rawValue = rawValue + } + + /// The default Commonmark behavior, no special options. + public static let `default` = CommonmarkOptions(rawValue: CMARK_OPT_DEFAULT) + + /// Include a `data-sourcepos` element on all block elements. + public static let sourcepos = CommonmarkOptions(rawValue: CMARK_OPT_SOURCEPOS) + + /// Render `softbreak` elements as hard line breaks. + public static let hardBreaks = CommonmarkOptions(rawValue: CMARK_OPT_HARDBREAKS) + + /// Render raw HTML and unsafe links. + /// + /// Unsafe links are `javascript:`, `vbscript:`, `file:`, and + /// `data:`, except for `image/png`, `image/gif`, `image/jpeg` + /// or `image/webp` MIME types. Without this option, raw HTML + /// is replaced by a placeholder HTML comment. Unsafe links + /// are replaced by empty strings. + public static let unsafe = CommonmarkOptions(rawValue: CMARK_OPT_UNSAFE) + + /// Render `softbreak` elements as spaces. + public static let noBreaks = CommonmarkOptions(rawValue: CMARK_OPT_NOBREAKS) + + /// Validate UTF-8 in the input before parsing, replacing illegal + /// sequences with the replacement character `U+FFFD`. + public static let validateUtf8 = CommonmarkOptions(rawValue: CMARK_OPT_VALIDATE_UTF8) + + /// Convert straight quotes to curly, `---` to em dashes, `--` to en dashes. + public static let smart = CommonmarkOptions(rawValue: CMARK_OPT_SMART) + + /// Use GitHub-style `
` tags for code blocks instead of
+    /// `
`.
+    public static let githubPreLang = CommonmarkOptions(rawValue: CMARK_OPT_GITHUB_PRE_LANG)
+
+    /// Be liberal in interpreting inline HTML tags.
+    public static let liberalHtmlTag = CommonmarkOptions(rawValue: CMARK_OPT_LIBERAL_HTML_TAG)
+
+    /// Parse footnotes.
+    public static let footnotes = CommonmarkOptions(rawValue: CMARK_OPT_FOOTNOTES)
+
+    /// Only parse strikethroughs if surrounded by exactly 2 tildes.
+    ///
+    /// Strikethroughs are still only parsed when the `"strikethrough"`
+    /// extension is enabled.
+    public static let strikethroughDoubleTilde = CommonmarkOptions(rawValue: CMARK_OPT_STRIKETHROUGH_DOUBLE_TILDE)
+
+    /// Use style attributes to align table cells instead of align attributes.
+    public static let tablePreferStyleAttributes = CommonmarkOptions(rawValue: CMARK_OPT_TABLE_PREFER_STYLE_ATTRIBUTES)
+
+    /// Include the remainder of the info string in code blocks in
+    /// a separate attribute.
+    public static let fullInfoString = CommonmarkOptions(rawValue: CMARK_OPT_FULL_INFO_STRING)
+
+    /// Parse only inline markdown directives. Block directives will not be
+    /// parsed (their literal representations will remain in the output).
+    public static let inlineOnly = CommonmarkOptions(rawValue: CMARK_OPT_INLINE_ONLY)
+
+    /// Parse the markdown input without removing preceding/trailing whitespace and
+    /// without converting newline characters to breaks.
+    ///
+    /// Using this option also enables the `CMARK_OPT_INLINE_ONLY` option.
+    // FIXME: the original `CMARK_OPT_PRESERVE_WHITESPACE` isn't available to the swift compiler?
+    public static let preserveWhitespace = CommonmarkOptions(rawValue: (1 << 19) | CMARK_OPT_INLINE_ONLY)
+
+    /// Enable the row- and column-span syntax in the tables extension.
+    public static let tableSpans = CommonmarkOptions(rawValue: CMARK_OPT_TABLE_SPANS)
+
+    /// Use a "ditto mark" (`"`) instead of a caret (`^`) to indicate row-spans in the tables extension.
+    public static let tableRowspanDitto = CommonmarkOptions(rawValue: CMARK_OPT_TABLE_ROWSPAN_DITTO)
+}
diff --git a/Tests/MarkdownTests/Parsing/CommonMarkConverterTests.swift b/Tests/MarkdownTests/Parsing/CommonMarkConverterTests.swift
index 94c52dd4..17337c70 100644
--- a/Tests/MarkdownTests/Parsing/CommonMarkConverterTests.swift
+++ b/Tests/MarkdownTests/Parsing/CommonMarkConverterTests.swift
@@ -33,4 +33,31 @@ class CommonMarkConverterTests: XCTestCase {
         let document = Document(parsing: text, source: nil, options: [.parseBlockDirectives, .parseSymbolLinks])
         XCTAssertEqual(expectedDump, document.debugDescription(options: .printSourceLocations))
     }
+    
+    /// Test using a custom set of Commonmark options to convert Markdown.
+    func testCustomOpts() {
+        let text = "~This is not strikethrough~ -- but ~~this is strikethrough~~."
+        
+        // Because the "smart" option is not set, the `--` should not be converted
+        // to an en-dash.
+        let expectedDump = """
+           Document @1:1-1:62
+           └─ Paragraph @1:1-1:62
+              ├─ Text @1:1-1:36 "~This is not strikethrough~ -- but "
+              ├─ Strikethrough @1:36-1:61
+              │  └─ Text @1:38-1:59 "this is strikethrough"
+              └─ Text @1:61-1:62 "."
+           """
+        
+        let document = Document(
+            parsing: text,
+            source: nil,
+            convertOptions: .init(
+                parseOptions: ConvertOptions.defaultParseOptions,
+                commonmarkOptions: .strikethroughDoubleTilde,
+                extensions: ConvertOptions.defaultCommonmarkExtensions
+            )
+        )
+        XCTAssertEqual(expectedDump, document.debugDescription(options: .printSourceLocations))
+    }
 }