inline.go

// Copyright 2021 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package markdown

import (
	"bytes"
	"fmt"
	"strings"
	"unicode"
	"unicode/utf8"
)

/*
text node can be

 - other literal text
 - run of * or _ characters
 - [
 - ![

keep delimiter stack pointing at non-other literal text
each node contains

 - type of delimiter [ ![ _ *
 - number of delimiters
 - active or not
 - potential opener, potential closer, or obth

when a ] is hit, call look for link or image
when end is hit, call process emphasis

look for link or image:

	find topmost [ or ![
	if none, emit literal ]
	if its inactive, remove and emit literal ]
	parse ahead to look for rest of link; if none, remove and emit literal ]
	run process emphasis on the interior,
	remove opener
	if this was a link (not an image), set all [ before opener to inactive, to avoid links inside links

process emphasis

	walk forward in list to find a closer.
	walk back to find first potential matching opener.
	if found:
		strong for length >= 2
		insert node
		drop delimiters between opener and closer
		remove 1 or 2 from open/close count, removing if now empty
		if closing has some left, go around again on this node
	if not:
		set openers bottom for this kind of element to before current_position
		if the closer at current pos is not an opener, remove it

seems needlessly complex. two passes

scan and find ` ` first.

pass 1. scan and find [ and ]() and leave the rest alone.

each completed one invokes emphasis on inner text and then on the overall list.

*/

type Inline interface {
	PrintHTML(*bytes.Buffer)
	PrintText(*bytes.Buffer)
	printMarkdown(*bytes.Buffer)
}

type Plain struct {
	Text string
}

func (*Plain) Inline() {}

func (x *Plain) PrintHTML(buf *bytes.Buffer) {
	htmlEscaper.WriteString(buf, x.Text)
}

func (x *Plain) printMarkdown(buf *bytes.Buffer) {
	buf.WriteString(x.Text)
}

func (x *Plain) PrintText(buf *bytes.Buffer) {
	htmlEscaper.WriteString(buf, x.Text)
}

type openPlain struct {
	Plain
	i int // position in input where bracket is
}

type emphPlain struct {
	Plain
	canOpen  bool
	canClose bool
	i        int // position in output where emph is
	n        int // length of original span
}

type Escaped struct {
	Plain
}

func (x *Escaped) printMarkdown(buf *bytes.Buffer) {
	buf.WriteByte('\\')
	x.Plain.printMarkdown(buf)
}

type Code struct {
	Text string
}

func (*Code) Inline() {}

func (x *Code) PrintHTML(buf *bytes.Buffer) {
	fmt.Fprintf(buf, "<code>%s</code>", htmlEscaper.Replace(x.Text))
}

func (x *Code) printMarkdown(buf *bytes.Buffer) {
	if len(x.Text) == 0 {
		return
	}
	// Use the fewest backticks we can, and add spaces as needed.
	ticks := strings.Repeat("`", longestSequence(x.Text, '`')+1)
	buf.WriteString(ticks)
	if x.Text[0] == '`' {
		buf.WriteByte(' ')
	}
	buf.WriteString(x.Text)
	if x.Text[len(x.Text)-1] == '`' {
		buf.WriteByte(' ')
	}
	buf.WriteString(ticks)
}

// longestSequence returns the length of the longest sequence of consecutive bytes b in s.
func longestSequence(s string, b byte) int {
	max := 0
	cur := 0
	for i := range s {
		if s[i] == b {
			cur++
		} else {
			if cur > max {
				max = cur
			}
			cur = 0
		}
	}
	if cur > max {
		max = cur
	}
	return max
}

func (x *Code) PrintText(buf *bytes.Buffer) {
	htmlEscaper.WriteString(buf, x.Text)
}

type Strong struct {
	Marker string
	Inner  []Inline
}

func (x *Strong) Inline() {
}

func (x *Strong) PrintHTML(buf *bytes.Buffer) {
	buf.WriteString("<strong>")
	for _, c := range x.Inner {
		c.PrintHTML(buf)
	}
	buf.WriteString("</strong>")
}

func (x *Strong) printMarkdown(buf *bytes.Buffer) {
	buf.WriteString(x.Marker)
	for _, c := range x.Inner {
		c.printMarkdown(buf)
	}
	buf.WriteString(x.Marker)
}

func (x *Strong) PrintText(buf *bytes.Buffer) {
	for _, c := range x.Inner {
		c.PrintText(buf)
	}
}

type Del struct {
	Marker string
	Inner  []Inline
}

func (x *Del) Inline() {

}

func (x *Del) PrintHTML(buf *bytes.Buffer) {
	buf.WriteString("<del>")
	for _, c := range x.Inner {
		c.PrintHTML(buf)
	}
	buf.WriteString("</del>")
}

func (x *Del) printMarkdown(buf *bytes.Buffer) {
	buf.WriteString(x.Marker)
	for _, c := range x.Inner {
		c.printMarkdown(buf)
	}
	buf.WriteString(x.Marker)
}

func (x *Del) PrintText(buf *bytes.Buffer) {
	for _, c := range x.Inner {
		c.PrintText(buf)
	}
}

type Emph struct {
	Marker string
	Inner  []Inline
}

func (*Emph) Inline() {}

func (x *Emph) PrintHTML(buf *bytes.Buffer) {
	buf.WriteString("<em>")
	for _, c := range x.Inner {
		c.PrintHTML(buf)
	}
	buf.WriteString("</em>")
}

func (x *Emph) printMarkdown(buf *bytes.Buffer) {
	buf.WriteString(x.Marker)
	for _, c := range x.Inner {
		c.printMarkdown(buf)
	}
	buf.WriteString(x.Marker)
}

func (x *Emph) PrintText(buf *bytes.Buffer) {
	for _, c := range x.Inner {
		c.PrintText(buf)
	}
}

func (p *parseState) emit(i int) {
	if p.emitted < i {
		p.list = append(p.list, &Plain{p.s[p.emitted:i]})
		p.emitted = i
	}
}

func (p *parseState) skip(i int) {
	p.emitted = i
}

func (p *parseState) inline(s string) []Inline {
	s = trimSpaceTab(s)
	// Scan text looking for inlines.
	// Leaf inlines are converted immediately.
	// Non-leaf inlines have potential starts pushed on a stack while we await completion.
	// Links take priority over other emphasis, so the emphasis must be delayed.
	p.s = s
	p.list = nil
	p.emitted = 0
	var opens []int // indexes of open ![ and [ Plains in p.list
	var lastLinkOpen int
	backticks := false
	i := 0
	for i < len(s) {
		var parser func(*parseState, string, int) (Inline, int, int, bool)
		switch s[i] {
		case '\\':
			parser = parseEscape
		case '`':
			if !backticks {
				backticks = true
				p.backticks.reset()
			}
			parser = p.backticks.parseCodeSpan
		case '<':
			parser = parseAutoLinkOrHTML
		case '[':
			parser = parseLinkOpen
		case '!':
			parser = parseImageOpen
		case '_', '*':
			parser = parseEmph
		case '.':
			if p.SmartDot {
				parser = parseDot
			}
		case '-':
			if p.SmartDash {
				parser = parseDash
			}
		case '"', '\'':
			if p.SmartQuote {
				parser = parseEmph
			}
		case '~':
			if p.Strikethrough {
				parser = parseEmph
			}
		case '\n': // TODO what about eof
			parser = parseBreak
		case '&':
			parser = parseHTMLEntity
		case ':':
			if p.Emoji {
				parser = parseEmoji
			}
		}
		if parser != nil {
			if x, start, end, ok := parser(p, s, i); ok {
				p.emit(start)
				if _, ok := x.(*openPlain); ok {
					opens = append(opens, len(p.list))
				}
				p.list = append(p.list, x)
				i = end
				p.skip(i)
				continue
			}
		}
		if s[i] == ']' && len(opens) > 0 {
			oi := opens[len(opens)-1]
			open := p.list[oi].(*openPlain)
			opens = opens[:len(opens)-1]
			if open.Text[0] == '!' || lastLinkOpen <= open.i {
				if x, end, ok := p.parseLinkClose(s, i, open); ok {
					p.corner = p.corner || x.corner || linkCorner(x.URL)
					p.emit(i)
					x.Inner = p.emph(nil, p.list[oi+1:])
					if open.Text[0] == '!' {
						p.list[oi] = (*Image)(x)
					} else {
						p.list[oi] = x
					}
					p.list = p.list[:oi+1]
					p.skip(end)
					i = end
					if open.Text[0] == '[' {
						// No links around links.
						lastLinkOpen = open.i
					}
					continue
				}
			}
		}
		i++
	}
	p.emit(len(s))
	p.list = p.emph(p.list[:0], p.list)
	p.list = p.mergePlain(p.list)
	p.list = p.autoLinkText(p.list)

	return p.list
}

func (ps *parseState) emph(dst, src []Inline) []Inline {
	const chars = "_*~\"'"
	var stack [len(chars)][]*emphPlain
	stackOf := func(c byte) int {
		return strings.IndexByte(chars, c)
	}

	trimStack := func() {
		for i := range stack {
			stk := &stack[i]
			for len(*stk) > 0 && (*stk)[len(*stk)-1].i >= len(dst) {
				*stk = (*stk)[:len(*stk)-1]
			}
		}
	}

Src:
	for i := 0; i < len(src); i++ {
		if open, ok := src[i].(*openPlain); ok {
			// Convert unused link/image open marker to plain text.
			dst = append(dst, &open.Plain)
			continue
		}
		p, ok := src[i].(*emphPlain)
		if !ok {
			dst = append(dst, src[i])
			continue
		}
		if p.canClose {
			stk := &stack[stackOf(p.Text[0])]
		Loop:
			for p.Text != "" {
				// Looking for same symbol and compatible with p.Text.
				for i := len(*stk) - 1; i >= 0; i-- {
					start := (*stk)[i]
					if (p.Text[0] == '*' || p.Text[0] == '_') && (p.canOpen && p.canClose || start.canOpen && start.canClose) && (p.n+start.n)%3 == 0 && (p.n%3 != 0 || start.n%3 != 0) {
						continue
					}
					if p.Text[0] == '~' && len(p.Text) != len(start.Text) { // ~ matches ~, ~~ matches ~~
						continue
					}
					if p.Text[0] == '"' {
						dst[start.i].(*emphPlain).Text = "“"
						p.Text = "”"
						dst = append(dst, p)
						*stk = (*stk)[:i]
						// no trimStack
						continue Src
					}
					if p.Text[0] == '\'' {
						dst[start.i].(*emphPlain).Text = "‘"
						p.Text = "’"
						dst = append(dst, p)
						*stk = (*stk)[:i]
						// no trimStack
						continue Src
					}
					var d int
					if len(p.Text) >= 2 && len(start.Text) >= 2 {
						// strong
						d = 2
					} else {
						// emph
						d = 1
					}
					del := p.Text[0] == '~'
					x := &Emph{Marker: p.Text[:d], Inner: append([]Inline(nil), dst[start.i+1:]...)}
					start.Text = start.Text[:len(start.Text)-d]
					p.Text = p.Text[d:]
					if start.Text == "" {
						dst = dst[:start.i]
					} else {
						dst = dst[:start.i+1]
					}
					trimStack()
					if del {
						dst = append(dst, (*Del)(x))
					} else if d == 2 {
						dst = append(dst, (*Strong)(x))
					} else {
						dst = append(dst, x)
					}
					continue Loop
				}
				break
			}
		}
		if p.Text != "" {
			stk := &stack[stackOf(p.Text[0])]
			if p.Text == "'" {
				p.Text = "’"
			}
			if p.Text == "\"" {
				if p.canClose {
					p.Text = "”"
				} else {
					p.Text = "“"
				}
			}
			if p.canOpen {
				p.i = len(dst)
				dst = append(dst, p)
				*stk = append(*stk, p)
			} else {
				dst = append(dst, &p.Plain)
			}
		}
	}
	return dst
}

func mdUnescape(s string) string {
	if !strings.Contains(s, `\`) && !strings.Contains(s, `&`) {
		return s
	}
	return mdUnescaper.Replace(s)
}

var mdUnescaper = func() *strings.Replacer {
	var list = []string{
		`\!`, `!`,
		`\"`, `"`,
		`\#`, `#`,
		`\$`, `$`,
		`\%`, `%`,
		`\&`, `&`,
		`\'`, `'`,
		`\(`, `(`,
		`\)`, `)`,
		`\*`, `*`,
		`\+`, `+`,
		`\,`, `,`,
		`\-`, `-`,
		`\.`, `.`,
		`\/`, `/`,
		`\:`, `:`,
		`\;`, `;`,
		`\<`, `<`,
		`\=`, `=`,
		`\>`, `>`,
		`\?`, `?`,
		`\@`, `@`,
		`\[`, `[`,
		`\\`, `\`,
		`\]`, `]`,
		`\^`, `^`,
		`\_`, `_`,
		"\\`", "`",
		`\{`, `{`,
		`\|`, `|`,
		`\}`, `}`,
		`\~`, `~`,
	}

	for name, repl := range htmlEntity {
		list = append(list, name, repl)
	}
	return strings.NewReplacer(list...)
}()

func isPunct(c byte) bool {
	return '!' <= c && c <= '/' || ':' <= c && c <= '@' || '[' <= c && c <= '`' || '{' <= c && c <= '~'
}

func parseEscape(p *parseState, s string, i int) (Inline, int, int, bool) {
	if i+1 < len(s) {
		c := s[i+1]
		if isPunct(c) {
			return &Escaped{Plain{s[i+1 : i+2]}}, i, i + 2, true
		}
		if c == '\n' { // TODO what about eof
			if i > 0 && s[i-1] == '\\' {
				p.corner = true // goldmark mishandles \\\ newline
			}
			end := i + 2
			for end < len(s) && (s[end] == ' ' || s[end] == '\t') {
				end++
			}
			return &HardBreak{}, i, end, true
		}
	}
	return nil, 0, 0, false
}

func parseDot(p *parseState, s string, i int) (Inline, int, int, bool) {
	if i+2 < len(s) && s[i+1] == '.' && s[i+2] == '.' {
		return &Plain{"…"}, i, i + 3, true
	}
	return nil, 0, 0, false
}

func parseDash(p *parseState, s string, i int) (Inline, int, int, bool) {
	if i+1 >= len(s) || s[i+1] != '-' {
		return nil, 0, 0, false
	}

	n := 2
	for i+n < len(s) && s[i+n] == '-' {
		n++
	}

	// Mimic cmark-gfm. Can't make this stuff up.
	em, en := 0, 0
	switch {
	case n%3 == 0:
		em = n / 3
	case n%2 == 0:
		en = n / 2
	case n%3 == 2:
		em = (n - 2) / 3
		en = 1
	case n%3 == 1:
		em = (n - 4) / 3
		en = 2
	}
	return &Plain{strings.Repeat("—", em) + strings.Repeat("–", en)}, i, i + n, true
}

// Inline code span markers must fit on punched cards, to match cmark-gfm.
const maxBackticks = 80

type backtickParser struct {
	last    [maxBackticks]int
	scanned bool
}

func (b *backtickParser) reset() {
	*b = backtickParser{}
}

func (b *backtickParser) parseCodeSpan(p *parseState, s string, i int) (Inline, int, int, bool) {
	start := i
	// Count leading backticks. Need to find that many again.
	n := 1
	for i+n < len(s) && s[i+n] == '`' {
		n++
	}

	// If we've already scanned the whole string (for a different count),
	// we can skip a failed scan by checking whether we saw this count.
	// To enable this optimization, following cmark-gfm, we declare by fiat
	// that more than maxBackticks backquotes is too many.
	if n > len(b.last) || b.scanned && b.last[n-1] < i+n {
		goto NoMatch
	}

	for end := i + n; end < len(s); {
		if s[end] != '`' {
			end++
			continue
		}
		estart := end
		for end < len(s) && s[end] == '`' {
			end++
		}
		m := end - estart
		if !b.scanned && m < len(b.last) {
			b.last[m-1] = estart
		}
		if m == n {
			// Match.
			// Line endings are converted to single spaces.
			text := s[i+n : estart]
			text = strings.ReplaceAll(text, "\n", " ")

			// If enclosed text starts and ends with a space and is not all spaces,
			// one space is removed from start and end, to allow `` ` `` to quote a single backquote.
			if len(text) >= 2 && text[0] == ' ' && text[len(text)-1] == ' ' && trimSpace(text) != "" {
				text = text[1 : len(text)-1]
			}

			return &Code{text}, start, end, true
		}
	}
	b.scanned = true

NoMatch:
	// No match, so none of these backticks count: skip them all.
	// For example ``x` is not a single backtick followed by a code span.
	// Returning nil, 0, false would advance to the second backtick and try again.
	return &Plain{s[i : i+n]}, start, i + n, true
}

func parseAutoLinkOrHTML(p *parseState, s string, i int) (Inline, int, int, bool) {
	if x, end, ok := parseAutoLinkURI(s, i); ok {
		return x, i, end, true
	}
	if x, end, ok := parseAutoLinkEmail(s, i); ok {
		return x, i, end, true
	}
	if x, end, ok := parseHTMLTag(p, s, i); ok {
		return x, i, end, true
	}
	return nil, 0, 0, false
}

func isLetter(c byte) bool {
	return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'
}

func isLDH(c byte) bool {
	return isLetterDigit(c) || c == '-'
}

func isLetterDigit(c byte) bool {
	return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9'
}

func parseLinkOpen(_ *parseState, s string, i int) (Inline, int, int, bool) {
	return &openPlain{Plain{s[i : i+1]}, i + 1}, i, i + 1, true
}

func parseImageOpen(_ *parseState, s string, i int) (Inline, int, int, bool) {
	if i+1 < len(s) && s[i+1] == '[' {
		return &openPlain{Plain{s[i : i+2]}, i + 2}, i, i + 2, true
	}
	return nil, 0, 0, false
}

func parseEmph(p *parseState, s string, i int) (Inline, int, int, bool) {
	c := s[i]
	j := i + 1
	if c == '*' || c == '~' || c == '_' {
		for j < len(s) && s[j] == c {
			j++
		}
	}
	if c == '~' && j-i != 2 {
		// Goldmark does not accept ~text~
		// and incorrectly accepts ~~~text~~~.
		// Only ~~ is correct.
		p.corner = true
	}
	if c == '~' && j-i > 2 {
		return &Plain{s[i:j]}, i, j, true
	}

	var before, after rune
	if i == 0 {
		before = ' '
	} else {
		before, _ = utf8.DecodeLastRuneInString(s[:i])
	}
	if j >= len(s) {
		after = ' '
	} else {
		after, _ = utf8.DecodeRuneInString(s[j:])
	}

	// “A left-flanking delimiter run is a delimiter run that is
	// (1) not followed by Unicode whitespace, and either
	// (2a) not followed by a Unicode punctuation character, or
	// (2b) followed by a Unicode punctuation character
	// and preceded by Unicode whitespace or a Unicode punctuation character.
	// For purposes of this definition, the beginning and the end
	// of the line count as Unicode whitespace.”
	leftFlank := !isUnicodeSpace(after) &&
		(!isUnicodePunct(after) || isUnicodeSpace(before) || isUnicodePunct(before))

	// “A right-flanking delimiter run is a delimiter run that is
	// (1) not preceded by Unicode whitespace, and either
	// (2a) not preceded by a Unicode punctuation character, or
	// (2b) preceded by a Unicode punctuation character
	// and followed by Unicode whitespace or a Unicode punctuation character.
	// For purposes of this definition, the beginning and the end
	// of the line count as Unicode whitespace.”
	rightFlank := !isUnicodeSpace(before) &&
		(!isUnicodePunct(before) || isUnicodeSpace(after) || isUnicodePunct(after))

	var canOpen, canClose bool

	switch c {
	case '\'', '"':
		canOpen = leftFlank && !rightFlank && before != ']' && before != ')'
		canClose = rightFlank
	case '*', '~':
		// “A single * character can open emphasis iff
		// it is part of a left-flanking delimiter run.”

		// “A double ** can open strong emphasis iff
		// it is part of a left-flanking delimiter run.”
		canOpen = leftFlank

		// “A single * character can close emphasis iff
		// it is part of a right-flanking delimiter run.”

		// “A double ** can close strong emphasis iff
		// it is part of a right-flanking delimiter run.”
		canClose = rightFlank
	case '_':
		// “A single _ character can open emphasis iff
		// it is part of a left-flanking delimiter run and either
		// (a) not part of a right-flanking delimiter run or
		// (b) part of a right-flanking delimiter run preceded by a Unicode punctuation character.”

		// “A double __ can open strong emphasis iff
		// it is part of a left-flanking delimiter run and either
		// (a) not part of a right-flanking delimiter run or
		// (b) part of a right-flanking delimiter run preceded by a Unicode punctuation character.”
		canOpen = leftFlank && (!rightFlank || isUnicodePunct(before))

		// “A single _ character can close emphasis iff
		// it is part of a right-flanking delimiter run and either
		// (a) not part of a left-flanking delimiter run or
		// (b) part of a left-flanking delimiter run followed by a Unicode punctuation character.”

		// “A double __ can close strong emphasis iff
		// it is part of a right-flanking delimiter run and either
		// (a) not part of a left-flanking delimiter run or
		// (b) part of a left-flanking delimiter run followed by a Unicode punctuation character.”
		canClose = rightFlank && (!leftFlank || isUnicodePunct(after))
	}

	return &emphPlain{Plain: Plain{s[i:j]}, canOpen: canOpen, canClose: canClose, n: j - i}, i, j, true
}

func isUnicodeSpace(r rune) bool {
	if r < 0x80 {
		return r == ' ' || r == '\t' || r == '\f' || r == '\n'
	}
	return unicode.In(r, unicode.Zs)
}

func isUnicodePunct(r rune) bool {
	if r < 0x80 {
		return isPunct(byte(r))
	}
	return unicode.In(r, unicode.Punct)
}

func (p *parseState) parseLinkClose(s string, i int, open *openPlain) (*Link, int, bool) {
	if i+1 < len(s) {
		switch s[i+1] {
		case '(':
			// Inline link - [Text](Dest Title), with Title omitted or both Dest and Title omitted.
			i := skipSpace(s, i+2)
			var dest, title string
			var titleChar byte
			var corner bool
			if i < len(s) && s[i] != ')' {
				var ok bool
				dest, i, ok = parseLinkDest(s, i)
				if !ok {
					break
				}
				i = skipSpace(s, i)
				if i < len(s) && s[i] != ')' {
					title, titleChar, i, ok = parseLinkTitle(s, i)
					if title == "" {
						corner = true
					}
					if !ok {
						break
					}
					i = skipSpace(s, i)
				}
			}
			if i < len(s) && s[i] == ')' {
				return &Link{URL: dest, Title: title, TitleChar: titleChar, corner: corner}, i + 1, true
			}
			// NOTE: Test malformed ( ) with shortcut reference
			// TODO fall back on syntax error?

		case '[':
			// Full reference link - [Text][Label]
			label, i, ok := parseLinkLabel(p, s, i+1)
			if !ok {
				break
			}
			if link, ok := p.links[normalizeLabel(label)]; ok {
				return &Link{URL: link.URL, Title: link.Title, corner: link.corner}, i, true
			}
			// Note: Could break here, but CommonMark dingus does not
			// fall back to trying Text for [Text][Label] when Label is unknown.
			// Unclear from spec what the correct answer is.
			return nil, 0, false
		}
	}

	// Collapsed or shortcut reference link: [Text][] or [Text].
	end := i + 1
	if strings.HasPrefix(s[end:], "[]") {
		end += 2
	}

	if link, ok := p.links[normalizeLabel(s[open.i:i])]; ok {
		return &Link{URL: link.URL, Title: link.Title, corner: link.corner}, end, true
	}
	return nil, 0, false
}

func skipSpace(s string, i int) int {
	// Note: Blank lines have already been removed.
	for i < len(s) && (s[i] == ' ' || s[i] == '\t' || s[i] == '\n') {
		i++
	}
	return i
}

func linkCorner(url string) bool {
	for i := 0; i < len(url); i++ {
		if url[i] == '%' {
			if i+2 >= len(url) || !isHexDigit(url[i+1]) || !isHexDigit(url[i+2]) {
				// Goldmark and the Dingus re-escape such percents as %25,
				// but the spec does not seem to require this behavior.
				return true
			}
		}
	}
	return false
}

func (p *parseState) mergePlain(list []Inline) []Inline {
	out := list[:0]
	start := 0
	for i := 0; ; i++ {
		if i < len(list) && toPlain(list[i]) != nil {
			continue
		}
		// Non-Plain or end of list.
		if start < i {
			out = append(out, mergePlain1(list[start:i]))
		}
		if i >= len(list) {
			break
		}
		out = append(out, list[i])
		start = i + 1
	}
	return out
}

func toPlain(x Inline) *Plain {
	// TODO what about Escaped?
	switch x := x.(type) {
	case *Plain:
		return x
	case *emphPlain:
		return &x.Plain
	case *openPlain:
		return &x.Plain
	}
	return nil
}

func mergePlain1(list []Inline) *Plain {
	if len(list) == 1 {
		return toPlain(list[0])
	}
	var all []string
	for _, pl := range list {
		all = append(all, toPlain(pl).Text)
	}
	return &Plain{Text: strings.Join(all, "")}
}

func parseEmoji(p *parseState, s string, i int) (Inline, int, int, bool) {
	for j := i + 1; ; j++ {
		if j >= len(s) || j-i > 2+maxEmojiLen {
			break
		}
		if s[j] == ':' {
			name := s[i+1 : j]
			if utf, ok := emoji[name]; ok {
				return &Emoji{s[i : j+1], utf}, i, j + 1, true
			}
			break
		}
	}
	return nil, 0, 0, false
}

type Emoji struct {
	Name string // emoji :name:, including colons
	Text string // Unicode for emoji sequence
}

func (*Emoji) Inline() {}

func (x *Emoji) PrintHTML(buf *bytes.Buffer) {
	htmlEscaper.WriteString(buf, x.Text)
}

func (x *Emoji) printMarkdown(buf *bytes.Buffer) {
	buf.WriteString(x.Text)
}

func (x *Emoji) PrintText(buf *bytes.Buffer) {
	htmlEscaper.WriteString(buf, x.Text)
}