Skip to content

Commit

Permalink
Allow unknown html entities (mmcdole#148)
Browse files Browse the repository at this point in the history
* Allow unknown html entities

* Simplification

html.UnescapeString now deals with invalid entites
  • Loading branch information
florentsolt authored May 29, 2020
1 parent 4298e43 commit 22a67f9
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 61 deletions.
45 changes: 2 additions & 43 deletions internal/shared/parseutils.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,8 @@ package shared
import (
"bytes"
"errors"
"fmt"
"html"
"regexp"
"strconv"
"strings"

xpp "github.com/mmcdole/goxpp"
Expand Down Expand Up @@ -119,47 +118,7 @@ func DecodeEntities(str string) (string, error) {
buf.Write(data)
return buf.String(), nil
} else {
if data[1] == '#' {
// Numerical character reference
var str string
base := 10

if len(data) > 2 && data[2] == 'x' {
str = string(data[3:end])
base = 16
} else {
str = string(data[2:end])
}

i, err := strconv.ParseUint(str, base, 32)
if err != nil {
return "", InvalidNumericReference
}

buf.WriteRune(rune(i))
} else {
// Predefined entity
name := string(data[1:end])

var c byte
switch name {
case "lt":
c = '<'
case "gt":
c = '>'
case "quot":
c = '"'
case "apos":
c = '\''
case "amp":
c = '&'
default:
return "", fmt.Errorf("unknown predefined "+
"entity &%s;", name)
}

buf.WriteByte(c)
}
buf.WriteString(html.UnescapeString(string(data[0 : end+1])))
}

// Skip the entity
Expand Down
19 changes: 1 addition & 18 deletions internal/shared/parseutils_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ func TestDecodeEntities(t *testing.T) {
{"&#34;foo&#34;", "\"foo\""},
{"&#x61;&#x062;&#x0063;", "abc"},
{"r&#xe9;sum&#x00E9;", "résumé"},
{"r&eacute;sum&eacute;", "résumé"},
{"&", "&"},
{"&foo", "&foo"},
{"&lt", "&lt"},
Expand All @@ -38,24 +39,6 @@ func TestDecodeEntities(t *testing.T) {
}
}

func TestDecodeEntitiesInvalid(t *testing.T) {
tests := []string{
// Predefined entities
"&foo;", // unknown

// Numerical character references
"&#;", // missing number
"&#x;", // missing hexadecimal number
"&#12a;", // invalid decimal number
"&#xfoo;", // invalid hexadecimal number
}

for _, test := range tests {
res, err := DecodeEntities(test)
assert.NotNil(t, err, "%q was decoded to %q", test, res)
}
}

func TestStripCDATA(t *testing.T) {
tests := []struct {
str string
Expand Down

0 comments on commit 22a67f9

Please sign in to comment.