forked from rsc/markdown
-
Notifications
You must be signed in to change notification settings - Fork 0
/
fuzz_test.go
110 lines (103 loc) · 2.73 KB
/
fuzz_test.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
// Copyright 2021 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package markdown
import (
"bytes"
"fmt"
"net/url"
"path/filepath"
"strings"
"testing"
"unicode/utf8"
"golang.org/x/tools/txtar"
)
func FuzzGoldmark(f *testing.F) {
if !*goldmarkFlag {
f.Skip("-goldmark not set")
}
files, err := filepath.Glob("testdata/*.txt")
if err != nil {
f.Fatal(err)
}
for _, file := range files {
if strings.HasSuffix(file, "to_markdown.txt") {
continue
}
a, err := txtar.ParseFile(file)
if err != nil {
f.Fatal(err)
}
for i := 0; i+2 <= len(a.Files); {
if a.Files[i].Name == "parser.json" {
i++
continue
}
md := a.Files[i]
html := a.Files[i+1]
i += 2
name := strings.TrimSuffix(md.Name, ".md")
if name != strings.TrimSuffix(html.Name, ".html") {
f.Fatalf("mismatched file pair: %s and %s", md.Name, html.Name)
}
f.Add(decode(string(md.Data)))
}
}
f.Fuzz(func(t *testing.T, s string) {
// Too many corner cases involving non-terminated lines.
if !strings.HasSuffix(s, "\n") {
s += "\n"
}
// Goldmark does not convert \r to \n.
s = strings.ReplaceAll(s, "\r", "\n")
// Goldmark treats \v as isUnicodeSpace for deciding emphasis.
// Not unreasonable, but not what the spec says.
s = strings.ReplaceAll(s, "\v", "\f")
if !utf8.ValidString(s) {
s = string([]rune(s)) // coerce to valid UTF8
}
var parsers = []Parser{
{},
{HeadingIDs: true},
{Strikethrough: true},
{TaskListItems: true},
{HeadingIDs: true, Strikethrough: true, TaskListItems: true},
}
for i, p := range parsers {
if t.Failed() {
break
}
t.Run(fmt.Sprintf("p%d", i), func(t *testing.T) {
doc, corner := p.parse(s)
if corner {
return
}
out := ToHTML(doc)
gm := goldmarkParser(&p)
var buf bytes.Buffer
if err := gm.Convert([]byte(s), &buf); err != nil {
t.Fatal(err)
}
if buf.Len() > 0 && buf.Bytes()[buf.Len()-1] != '\n' {
buf.WriteByte('\n')
}
gout := buf.String()
// Goldmark uses <br />, <hr />, and <img />.
// Goldmark also escapes | as %7C.
// Apply rewrites to out as well as gout to handle these appearing
// as literals in the input.
canon := func(s string) string {
s = strings.ReplaceAll(s, " />", ">")
s = strings.ReplaceAll(s, "%7C", "|")
return s
}
out = canon(out)
gout = canon(gout)
if out != gout {
q := strings.ReplaceAll(url.QueryEscape(s), "+", "%20")
t.Fatalf("in: %q\nparse:\n%s\nout: %q\ngout: %q\ndingus: (https://spec.commonmark.org/dingus/?text=%s)\ngithub: (https://github.com/rsc/tmp/issues/new?body=%s)", s, dump(doc), out, gout, q, q)
}
})
}
})
}