Fix markdown export

standardnotes · Dec 31, 2024 · d79da7a · d79da7a
1 parent c008278
commit d79da7a
Show file tree

Hide file tree

Showing 4 changed files with 129 additions and 32 deletions.
diff --git a/packages/web/src/javascripts/Components/SuperEditor/Lexical/Utils/MarkdownExport.ts b/packages/web/src/javascripts/Components/SuperEditor/Lexical/Utils/MarkdownExport.ts
@@ -7,12 +7,9 @@
  */
 
 /**
- * Taken from https://github.com/facebook/lexical/blob/main/packages/lexical-markdown/src/MarkdownExport.ts
- * but modified using changes from https://github.com/facebook/lexical/pull/4957 to make nested elements work
- * better when exporting to markdown.
- */
+ * Taken from https://github.com/facebook/lexical/blob/main/packages/lexical-markdown/src/MarkdownExport.ts but modified using changes from https://github.com/facebook/lexical/pull/4957 to make nested elements work better when exporting to markdown.
+ * */
 
-import type { ElementTransformer, TextFormatTransformer, TextMatchTransformer, Transformer } from '@lexical/markdown'
 import {
   ElementNode,
   LexicalNode,
@@ -24,10 +21,27 @@ import {
   $isLineBreakNode,
   $isTextNode,
 } from 'lexical'
-import { TRANSFORMERS, transformersByType } from './MarkdownImportExportUtils'
 
-export function createMarkdownExport(transformers: Array<Transformer>): (node?: ElementNode) => string {
+import {
+  ElementTransformer,
+  MultilineElementTransformer,
+  TextFormatTransformer,
+  TextMatchTransformer,
+  Transformer,
+} from '@lexical/markdown'
+
+import { isEmptyParagraph, TRANSFORMERS, transformersByType } from './MarkdownImportExportUtils'
+
+/**
+ * Renders string from markdown. The selection is moved to the start after the operation.
+ */
+function createMarkdownExport(
+  transformers: Array<Transformer>,
+  shouldPreserveNewLines: boolean = false,
+): (node?: ElementNode) => string {
   const byType = transformersByType(transformers)
+  const elementTransformers = [...byType.multilineElement, ...byType.element]
+  const isNewlineDelimited = !shouldPreserveNewLines
 
   // Export only uses text formats that are responsible for single format
   // e.g. it will filter out *** (bold, italic) and instead use separate ** and *
@@ -37,25 +51,35 @@ export function createMarkdownExport(transformers: Array<Transformer>): (node?:
     const output = []
     const children = (node || $getRoot()).getChildren()
 
-    for (const child of children) {
-      const result = exportTopLevelElements(child, byType.element, textFormatTransformers, byType.textMatch)
+    for (let i = 0; i < children.length; i++) {
+      const child = children[i]
+      const result = exportTopLevelElements(child, elementTransformers, textFormatTransformers, byType.textMatch)
 
       if (result != null) {
-        output.push(result)
+        output.push(
+          // separate consecutive group of texts with a line break: eg. ["hello", "world"] -> ["hello", "/nworld"]
+          isNewlineDelimited && i > 0 && !isEmptyParagraph(child) && !isEmptyParagraph(children[i - 1])
+            ? '\n'.concat(result)
+            : result,
+        )
       }
     }
-
-    return output.join('\n\n')
+    // Ensure consecutive groups of texts are at least \n\n apart while each empty paragraph render as a newline.
+    // Eg. ["hello", "", "", "hi", "\nworld"] -> "hello\n\n\nhi\n\nworld"
+    return output.join('\n')
   }
 }
 
 function exportTopLevelElements(
   node: LexicalNode,
-  elementTransformers: Array<ElementTransformer>,
+  elementTransformers: Array<ElementTransformer | MultilineElementTransformer>,
   textTransformersIndex: Array<TextFormatTransformer>,
   textMatchTransformers: Array<TextMatchTransformer>,
 ): string | null {
   for (const transformer of elementTransformers) {
+    if (!transformer.export) {
+      continue
+    }
     const result = transformer.export(node, (_node) =>
       exportChildren(_node, elementTransformers, textTransformersIndex, textMatchTransformers),
     )
@@ -76,23 +100,33 @@ function exportTopLevelElements(
 
 function exportChildren(
   node: ElementNode,
-  elementTransformers: Array<ElementTransformer>,
+  elementTransformers: Array<ElementTransformer | MultilineElementTransformer>,
   textTransformersIndex: Array<TextFormatTransformer>,
   textMatchTransformers: Array<TextMatchTransformer>,
 ): string {
   const output = []
   const children = node.getChildren()
+  const childrenLength = children.length
+  // keep track of unclosed tags from the very beginning
+  const unclosedTags: { format: TextFormatType; tag: string }[] = []
+
+  mainLoop: for (let childIndex = 0; childIndex < childrenLength; childIndex++) {
+    const child = children[childIndex]
+    const isLastChild = childIndex === childrenLength - 1
 
-  mainLoop: for (const child of children) {
     if ($isElementNode(child)) {
       for (const transformer of elementTransformers) {
+        if (!transformer.export) {
+          continue
+        }
+
         const result = transformer.export(child, (_node) =>
           exportChildren(_node, elementTransformers, textTransformersIndex, textMatchTransformers),
         )
 
         if (result != null) {
           output.push(result)
-          if (children.indexOf(child) !== children.length - 1) {
+          if (!isLastChild) {
             output.push('\n')
           }
           continue mainLoop
@@ -101,10 +135,14 @@ function exportChildren(
     }
 
     for (const transformer of textMatchTransformers) {
+      if (!transformer.export) {
+        continue
+      }
+
       const result = transformer.export(
         child,
         (parentNode) => exportChildren(parentNode, elementTransformers, textTransformersIndex, textMatchTransformers),
-        (textNode, textContent) => exportTextFormat(textNode, textContent, textTransformersIndex),
+        (textNode, textContent) => exportTextFormat(textNode, textContent, textTransformersIndex, unclosedTags),
       )
 
       if (result != null) {
@@ -116,9 +154,15 @@ function exportChildren(
     if ($isLineBreakNode(child)) {
       output.push('\n')
     } else if ($isTextNode(child)) {
-      output.push(exportTextFormat(child, child.getTextContent(), textTransformersIndex))
+      output.push(exportTextFormat(child, child.getTextContent(), textTransformersIndex, unclosedTags))
     } else if ($isElementNode(child)) {
-      output.push(exportChildren(child, elementTransformers, textTransformersIndex, textMatchTransformers), '\n')
+      // empty paragraph returns ""
+      output.push(exportChildren(child, elementTransformers, textTransformersIndex, textMatchTransformers))
+      // Don't insert linebreak after last child
+      if (!isLastChild) {
+        // Insert two line breaks to create a space between two paragraphs or other elements, as required by Markdown syntax.
+        output.push('\n', '\n')
+      }
     } else if ($isDecoratorNode(child)) {
       output.push(child.getTextContent())
     }
@@ -127,41 +171,66 @@ function exportChildren(
   return output.join('')
 }
 
-function exportTextFormat(node: TextNode, textContent: string, textTransformers: Array<TextFormatTransformer>): string {
+function exportTextFormat(
+  node: TextNode,
+  textContent: string,
+  textTransformers: Array<TextFormatTransformer>,
+  // unclosed tags include the markdown tags that haven't been closed yet, and their associated formats
+  unclosedTags: Array<{ format: TextFormatType; tag: string }>,
+): string {
   // This function handles the case of a string looking like this: "   foo   "
   // Where it would be invalid markdown to generate: "**   foo   **"
   // We instead want to trim the whitespace out, apply formatting, and then
   // bring the whitespace back. So our returned string looks like this: "   **foo**   "
   const frozenString = textContent.trim()
   let output = frozenString
+  // the opening tags to be added to the result
+  let openingTags = ''
+  // the closing tags to be added to the result
+  let closingTags = ''
+
+  const prevNode = getTextSibling(node, true)
+  const nextNode = getTextSibling(node, false)
 
   const applied = new Set()
 
   for (const transformer of textTransformers) {
     const format = transformer.format[0]
     const tag = transformer.tag
 
+    // dedup applied formats
     if (hasFormat(node, format) && !applied.has(format)) {
       // Multiple tags might be used for the same format (*, _)
       applied.add(format)
-      // Prevent adding opening tag is already opened by the previous sibling
-      const previousNode = getTextSibling(node, true)
 
-      if (!hasFormat(previousNode, format)) {
-        output = tag + output
+      // append the tag to openningTags, if it's not applied to the previous nodes,
+      // or the nodes before that (which would result in an unclosed tag)
+      if (!hasFormat(prevNode, format) || !unclosedTags.find((element) => element.tag === tag)) {
+        unclosedTags.push({ format, tag })
+        openingTags += tag
       }
+    }
+  }
 
-      // Prevent adding closing tag if next sibling will do it
-      const nextNode = getTextSibling(node, false)
+  // close any tags in the same order they were applied, if necessary
+  for (let i = 0; i < unclosedTags.length; i++) {
+    // prevent adding closing tag if next sibling will do it
+    if (hasFormat(nextNode, unclosedTags[i].format)) {
+      continue
+    }
 
-      if (!hasFormat(nextNode, format)) {
-        output += tag
+    while (unclosedTags.length > i) {
+      const unclosedTag = unclosedTags.pop()
+      if (unclosedTag && typeof unclosedTag.tag === 'string') {
+        closingTags += unclosedTag.tag
       }
     }
+    break
   }
 
+  output = openingTags + output + closingTags
   // Replace trimmed version of textContent ensuring surrounding whitespace is not modified
-  return textContent.replace(frozenString, output)
+  return textContent.replace(frozenString, () => output)
 }
 
 // Get next or previous text sibling a text node, including cases
@@ -208,7 +277,11 @@ function hasFormat(node: LexicalNode | null | undefined, format: TextFormatType)
   return $isTextNode(node) && node.hasFormat(format)
 }
 
-export function $convertToMarkdownString(transformers: Array<Transformer> = TRANSFORMERS, node?: ElementNode): string {
-  const exportMarkdown = createMarkdownExport(transformers)
+export function $convertToMarkdownString(
+  transformers: Array<Transformer> = TRANSFORMERS,
+  node?: ElementNode,
+  shouldPreserveNewLines: boolean = false,
+): string {
+  const exportMarkdown = createMarkdownExport(transformers, shouldPreserveNewLines)
   return exportMarkdown(node)
 }
diff --git a/...ges/web/src/javascripts/Components/SuperEditor/Lexical/Utils/MarkdownImportExportUtils.ts b/...ges/web/src/javascripts/Components/SuperEditor/Lexical/Utils/MarkdownImportExportUtils.ts
@@ -6,7 +6,10 @@ import {
   ELEMENT_TRANSFORMERS,
   TEXT_FORMAT_TRANSFORMERS,
   TEXT_MATCH_TRANSFORMERS,
+  MultilineElementTransformer,
+  MULTILINE_ELEMENT_TRANSFORMERS,
 } from '@lexical/markdown'
+import { LexicalNode, $isParagraphNode, $isTextNode } from 'lexical'
 
 function indexBy<T>(list: Array<T>, callback: (arg0: T) => string): Readonly<Record<string, Array<T>>> {
   const index: Record<string, Array<T>> = {}
@@ -26,20 +29,39 @@ function indexBy<T>(list: Array<T>, callback: (arg0: T) => string): Readonly<Rec
 
 export function transformersByType(transformers: Array<Transformer>): Readonly<{
   element: Array<ElementTransformer>
+  multilineElement: Array<MultilineElementTransformer>
   textFormat: Array<TextFormatTransformer>
   textMatch: Array<TextMatchTransformer>
 }> {
   const byType = indexBy(transformers, (t) => t.type)
 
   return {
     element: (byType.element || []) as Array<ElementTransformer>,
+    multilineElement: (byType['multiline-element'] || []) as Array<MultilineElementTransformer>,
     textFormat: (byType['text-format'] || []) as Array<TextFormatTransformer>,
     textMatch: (byType['text-match'] || []) as Array<TextMatchTransformer>,
   }
 }
 
+const MARKDOWN_EMPTY_LINE_REG_EXP = /^\s{0,3}$/
+
+export function isEmptyParagraph(node: LexicalNode): boolean {
+  if (!$isParagraphNode(node)) {
+    return false
+  }
+
+  const firstChild = node.getFirstChild()
+  return (
+    firstChild == null ||
+    (node.getChildrenSize() === 1 &&
+      $isTextNode(firstChild) &&
+      MARKDOWN_EMPTY_LINE_REG_EXP.test(firstChild.getTextContent()))
+  )
+}
+
 export const TRANSFORMERS: Array<Transformer> = [
   ...ELEMENT_TRANSFORMERS,
+  ...MULTILINE_ELEMENT_TRANSFORMERS,
   ...TEXT_FORMAT_TRANSFORMERS,
   ...TEXT_MATCH_TRANSFORMERS,
 ]
diff --git a/packages/web/src/javascripts/Components/SuperEditor/MarkdownTransformers.ts b/packages/web/src/javascripts/Components/SuperEditor/MarkdownTransformers.ts
@@ -7,6 +7,7 @@ import {
   TextMatchTransformer,
   $convertToMarkdownString,
   $convertFromMarkdownString,
+  MULTILINE_ELEMENT_TRANSFORMERS,
 } from '@lexical/markdown'
 import {
   $createTableCellNode,
@@ -247,6 +248,7 @@ export const MarkdownTransformers = [
   IMAGE,
   INLINE_FILE,
   ...ELEMENT_TRANSFORMERS,
+  ...MULTILINE_ELEMENT_TRANSFORMERS,
   ...TEXT_FORMAT_TRANSFORMERS,
   ...TEXT_MATCH_TRANSFORMERS,
   HorizontalRule,

diff --git a/...avascripts/Components/SuperEditor/Plugins/MarkdownPreviewPlugin/MarkdownPreviewPlugin.tsx b/...avascripts/Components/SuperEditor/Plugins/MarkdownPreviewPlugin/MarkdownPreviewPlugin.tsx
@@ -2,9 +2,9 @@ import { useLexicalComposerContext } from '@lexical/react/LexicalComposerContext
 import { useEffect } from 'react'
 import { $createCodeNode } from '@lexical/code'
 import { $createTextNode, $getRoot, $isParagraphNode } from 'lexical'
-import { $convertToMarkdownString } from '@lexical/markdown'
 import { MarkdownTransformers } from '../../MarkdownTransformers'
 import { $dfs } from '@lexical/utils'
+import { $convertToMarkdownString } from '../../Lexical/Utils/MarkdownExport'
 
 type Props = {
   onMarkdown: (markdown: string) => void