Skip to content

Commit

Permalink
Enforce UTF-8 encoding for input bytes (#115)
Browse files Browse the repository at this point in the history
* Detect UTF-8 for vtt/srt, error if encoded otherwise

* Switch to isValidUTF8Reader

* Remove isValidUTF8Reader function

* Enforce valid UTF8 on each SRT and WebVTT line

* Add benchmark tests for webvtt and srt

* Update benchmark test

* Add non-utf-8 tests and files

* Remove benchmarks
  • Loading branch information
nakkamarra authored Oct 8, 2024
1 parent a3932fd commit 80e6dcf
Show file tree
Hide file tree
Showing 6 changed files with 24 additions and 0 deletions.
5 changes: 5 additions & 0 deletions srt.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"strconv"
"strings"
"time"
"unicode/utf8"
)

// Constants
Expand Down Expand Up @@ -43,6 +44,10 @@ func ReadFromSRT(i io.Reader) (o *Subtitles, err error) {
// Fetch line
line = strings.TrimSpace(scanner.Text())
lineNum++
if !utf8.ValidString(line) {
err = fmt.Errorf("astisub: line %d is not valid utf-8", lineNum)
return
}

// Remove BOM header
if lineNum == 1 {
Expand Down
5 changes: 5 additions & 0 deletions srt_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,8 @@ func TestSRTMissingSequence(t *testing.T) {
assert.NoError(t, err)
assert.Equal(t, string(c), w.String())
}

func TestNonUTF8SRT(t *testing.T) {
_, err := astisub.OpenFile("./testdata/example-in-non-utf8.srt")
assert.Error(t, err)
}
Binary file added testdata/example-in-non-utf8.srt
Binary file not shown.
Binary file added testdata/example-in-non-utf8.vtt
Binary file not shown.
9 changes: 9 additions & 0 deletions webvtt.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"strconv"
"strings"
"time"
"unicode/utf8"

"golang.org/x/net/html"
)
Expand Down Expand Up @@ -128,6 +129,10 @@ func ReadFromWebVTT(i io.Reader) (o *Subtitles, err error) {
lineNum++
line = scanner.Text()
line = strings.TrimPrefix(line, string(BytesBOM))
if !utf8.ValidString(line) {
err = fmt.Errorf("astisub: line %d is not valid utf-8", lineNum)
return
}
if fs := strings.Fields(line); len(fs) > 0 && fs[0] == "WEBVTT" {
break
}
Expand All @@ -144,6 +149,10 @@ func ReadFromWebVTT(i io.Reader) (o *Subtitles, err error) {
// Fetch line
line = strings.TrimSpace(scanner.Text())
lineNum++
if !utf8.ValidString(line) {
err = fmt.Errorf("astisub: line %d is not valid utf-8", lineNum)
return
}

switch {
// Comment
Expand Down
5 changes: 5 additions & 0 deletions webvtt_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,11 @@ func TestBroken1WebVTT(t *testing.T) {
assert.Nil(t, err)
}

func TestNonUTF8WebVTT(t *testing.T) {
_, err := astisub.OpenFile("./testdata/example-in-non-utf8.vtt")
assert.Error(t, err)
}

func TestWebVTTWithVoiceName(t *testing.T) {
testData := `WEBVTT
Expand Down

0 comments on commit 80e6dcf

Please sign in to comment.