Skip to content

Commit

Permalink
HTML start tag closer now returns nil on Text(), Text() more often se…
Browse files Browse the repository at this point in the history
…t for other textual tokens, added tests
  • Loading branch information
tdewolff committed Nov 30, 2019
1 parent a456908 commit 4e1356a
Show file tree
Hide file tree
Showing 2 changed files with 71 additions and 5 deletions.
13 changes: 8 additions & 5 deletions html/lex.go
Original file line number Diff line number Diff line change
Expand Up @@ -123,15 +123,13 @@ func (l *Lexer) Next() (TokenType, []byte) {
} else if c != '>' && (c != '/' || l.r.Peek(1) != '>') {
return AttributeToken, l.shiftAttribute()
}
start := l.r.Pos()
l.r.Skip()
l.inTag = false
if c == '/' {
l.r.Move(2)
l.text = l.r.Lexeme()[start:]
return StartTagVoidToken, l.r.Shift()
}
l.r.Move(1)
l.text = l.r.Lexeme()[start:]
return StartTagCloseToken, l.r.Shift()
}

Expand All @@ -151,7 +149,8 @@ func (l *Lexer) Next() (TokenType, []byte) {
if l.r.Pos() > 0 {
if isEndTag || 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || c == '!' || c == '?' {
// return currently buffered texttoken so that we can return tag next iteration
return TextToken, l.r.Shift()
l.text = l.r.Shift()
return TextToken, l.text
}
} else if isEndTag {
l.r.Move(2)
Expand All @@ -173,7 +172,8 @@ func (l *Lexer) Next() (TokenType, []byte) {
}
} else if c == 0 && l.r.Err() != nil {
if l.r.Pos() > 0 {
return TextToken, l.r.Shift()
l.text = l.r.Shift()
return TextToken, l.text
}
return ErrorToken, nil
}
Expand Down Expand Up @@ -266,6 +266,7 @@ func (l *Lexer) readMarkup() (TokenType, []byte) {
l.r.Move(2)
for {
if l.r.Peek(0) == 0 && l.r.Err() != nil {
l.text = l.r.Lexeme()[4:]
return CommentToken, l.r.Shift()
} else if l.at('-', '-', '>') {
l.text = l.r.Lexeme()[4:]
Expand All @@ -282,8 +283,10 @@ func (l *Lexer) readMarkup() (TokenType, []byte) {
l.r.Move(7)
for {
if l.r.Peek(0) == 0 && l.r.Err() != nil {
l.text = l.r.Lexeme()[9:]
return TextToken, l.r.Shift()
} else if l.at(']', ']', '>') {
l.text = l.r.Lexeme()[9:]
l.r.Move(3)
return TextToken, l.r.Shift()
}
Expand Down
63 changes: 63 additions & 0 deletions html/lex_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,69 @@ func TestErrors(t *testing.T) {
}
}

func TestTextAndAttrVal(t *testing.T) {
l := NewLexer(bytes.NewBufferString(`<div attr="val" >text<!--comment--><!DOCTYPE doctype><![CDATA[cdata]]><script>js</script><svg>image</svg>`))
_, data := l.Next()
test.Bytes(t, data, []byte("<div"))
test.Bytes(t, l.Text(), []byte("div"))
test.Bytes(t, l.AttrVal(), nil)

_, data = l.Next()
test.Bytes(t, data, []byte(` attr="val"`))
test.Bytes(t, l.Text(), []byte("attr"))
test.Bytes(t, l.AttrVal(), []byte(`"val"`))

_, data = l.Next()
test.Bytes(t, data, []byte(">"))
test.Bytes(t, l.Text(), nil)
test.Bytes(t, l.AttrVal(), nil)

_, data = l.Next()
test.Bytes(t, data, []byte("text"))
test.Bytes(t, l.Text(), []byte("text"))
test.Bytes(t, l.AttrVal(), nil)

_, data = l.Next()
test.Bytes(t, data, []byte("<!--comment-->"))
test.Bytes(t, l.Text(), []byte("comment"))
test.Bytes(t, l.AttrVal(), nil)

_, data = l.Next()
test.Bytes(t, data, []byte("<!DOCTYPE doctype>"))
test.Bytes(t, l.Text(), []byte(" doctype"))
test.Bytes(t, l.AttrVal(), nil)

_, data = l.Next()
test.Bytes(t, data, []byte("<![CDATA[cdata]]>"))
test.Bytes(t, l.Text(), []byte("cdata"))
test.Bytes(t, l.AttrVal(), nil)

_, data = l.Next()
test.Bytes(t, data, []byte("<script"))
test.Bytes(t, l.Text(), []byte("script"))
test.Bytes(t, l.AttrVal(), nil)

_, data = l.Next()
test.Bytes(t, data, []byte(">"))
test.Bytes(t, l.Text(), nil)
test.Bytes(t, l.AttrVal(), nil)

_, data = l.Next()
test.Bytes(t, data, []byte("js"))
test.Bytes(t, l.Text(), nil)
test.Bytes(t, l.AttrVal(), nil)

_, data = l.Next()
test.Bytes(t, data, []byte("</script>"))
test.Bytes(t, l.Text(), []byte("script"))
test.Bytes(t, l.AttrVal(), nil)

_, data = l.Next()
test.Bytes(t, data, []byte("<svg>image</svg>"))
test.Bytes(t, l.Text(), []byte("svg"))
test.Bytes(t, l.AttrVal(), nil)
}

func TestOffset(t *testing.T) {
l := NewLexer(bytes.NewBufferString(`<div attr="val">text</div>`))
test.T(t, l.Offset(), 0)
Expand Down

0 comments on commit 4e1356a

Please sign in to comment.