Skip to content

Commit

Permalink
Add ReplaceEntities and remove entity replacement from EscapeAttrVal;…
Browse files Browse the repository at this point in the history
… add entity replacements for HTML and XML that map to single bytes
  • Loading branch information
tdewolff committed Nov 20, 2019
1 parent 24e08b0 commit c6bbc7c
Show file tree
Hide file tree
Showing 7 changed files with 306 additions and 173 deletions.
84 changes: 50 additions & 34 deletions common.go
Original file line number Diff line number Diff line change
Expand Up @@ -189,43 +189,59 @@ func DataURI(dataURI []byte) ([]byte, []byte, error) {
return nil, nil, ErrBadDataURI
}

// QuoteEntity parses the given byte slice and returns the quote that got matched (' or ") and its entity length.
func QuoteEntity(b []byte) (quote byte, n int) {
if len(b) < 5 || b[0] != '&' {
return 0, 0
}
if b[1] == '#' {
if b[2] == 'x' {
i := 3
for i < len(b) && b[i] == '0' {
i++
}
if i+2 < len(b) && b[i] == '2' && b[i+2] == ';' {
if b[i+1] == '2' {
return '"', i + 3 // &#x22;
} else if b[i+1] == '7' {
return '\'', i + 3 // &#x27;
// ReplaceEntities replaces all occurrences of entites (such as &quot;) to their respective unencoded bytes.
func ReplaceEntities(b []byte, entities map[string]byte) []byte {
for i := 0; i < len(b); i++ {
if b[i] == '&' && i+3 < len(b) {
var r byte
var n int
if b[i+1] == '#' {
if b[i+2] == 'x' {
c := 0
j := i + 3
for ; j < len(b) && c < 256 && (b[j] >= '0' && b[j] <= '9' || b[j] >= 'a' && b[j] <= 'f' || b[j] >= 'A' && b[j] <= 'F'); j++ {
if b[j] <= '9' {
c = c<<4 + int(b[j]-'0')
} else if b[j] <= 'F' {
c = c<<4 + int(b[j]-'A') + 10
} else if b[j] <= 'f' {
c = c<<4 + int(b[j]-'a') + 10
}
}
if i+3 < j && j < len(b) && c < 256 && b[j] == ';' {
r = byte(c)
n = j + 1 - i
}
} else {
c := 0
j := i + 2
for ; j < len(b) && c < 256 && b[j] >= '0' && b[j] <= '9'; j++ {
c = c*10 + int(b[j]-'0')
}
if i+2 < j && j < len(b) && c < 256 && b[j] == ';' {
r = byte(c)
n = j + 1 - i
}
}
}
} else {
i := 2
for i < len(b) && b[i] == '0' {
i++
}
if i+2 < len(b) && b[i] == '3' && b[i+2] == ';' {
if b[i+1] == '4' {
return '"', i + 3 // &#34;
} else if b[i+1] == '9' {
return '\'', i + 3 // &#39;
} else {
j := i + 1
for ; j < len(b) && b[j] != ';'; j++ {
}
if i+1 < j && j < len(b) {
var ok bool
r, ok = entities[string(b[i+1:j])]
if ok {
n = j + 1 - i
}
}
}
}
} else if len(b) >= 6 && b[5] == ';' {
if EqualFold(b[1:5], []byte{'q', 'u', 'o', 't'}) {
return '"', 6 // &quot;
} else if EqualFold(b[1:5], []byte{'a', 'p', 'o', 's'}) {
return '\'', 6 // &apos;

if 0 < n {
b[i] = byte(r)
copy(b[i+1:], b[i+n:])
b = b[:len(b)-n+1]
}
}
}
return 0, 0
return b
}
44 changes: 25 additions & 19 deletions common_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,26 +103,32 @@ func TestParseDataURI(t *testing.T) {
}
}

func TestParseQuoteEntity(t *testing.T) {
var quoteEntityTests = []struct {
quoteEntity string
expectedQuote byte
expectedN int
func TestReplaceEntities(t *testing.T) {
entityMap := map[string]byte{
"quot": '"',
"apos": '\'',
}
var entityTests = []struct {
entity string
expected string
}{
{"&#34;", '"', 5},
{"&#039;", '\'', 6},
{"&#x0022;", '"', 8},
{"&#x27;", '\'', 6},
{"&quot;", '"', 6},
{"&apos;", '\'', 6},
{"&gt;", 0x00, 0},
{"&amp;", 0x00, 0},
}
for _, tt := range quoteEntityTests {
t.Run(tt.quoteEntity, func(t *testing.T) {
quote, n := QuoteEntity([]byte(tt.quoteEntity))
test.T(t, quote, tt.expectedQuote, "quote")
test.T(t, n, tt.expectedN, "quote length")
{"&#34;", `"`},
{"&#039;", `'`},
{"&#x0022;", `"`},
{"&#x27;", `'`},
{"&quot;", `"`},
{"&apos;", `'`},
{"&#9191;", `&#9191;`},
{"&#x23e7;", `&#x23e7;`},
{"&apos;&quot;", `'"`},
{"&#34", `&#34`},
{"&#x22", `&#x22`},
{"&apos", `&apos`},
}
for _, tt := range entityTests {
t.Run(tt.entity, func(t *testing.T) {
b := ReplaceEntities([]byte(tt.entity), entityMap)
test.T(t, string(b), tt.expected)
})
}
}
Expand Down
2 changes: 0 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,4 +1,2 @@
github.com/tdewolff/test v1.0.0 h1:jOwzqCXr5ePXEPGJaq2ivoR6HOCi+D5TPfpoyg8yvmU=
github.com/tdewolff/test v1.0.0/go.mod h1:DiQUlutnqlEvdvhSn2LPGy4TFwRauAaYDsL+683RNX4=
github.com/tdewolff/test v1.0.4 h1:ih38SXuQJ32Hng5EtSW32xqEsVeMnPp6nNNRPhBBDE8=
github.com/tdewolff/test v1.0.4/go.mod h1:6DAvZliBAAnD7rhVgwaM7DE5/d9NMOAJ09SqYqeK4QE=
Loading

0 comments on commit c6bbc7c

Please sign in to comment.