Skip to content

Commit

Permalink
introduce LineReader
Browse files Browse the repository at this point in the history
  • Loading branch information
marctrem committed Sep 6, 2024
1 parent 652f1e3 commit 9a766a4
Show file tree
Hide file tree
Showing 8 changed files with 724 additions and 1 deletion.
8 changes: 7 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,8 @@
# go-commons
A kitchen sync of data structures and algorithms in Go
A kitchen sink of data structures and algorithms in Go

# Contents

## io
### LineReader
Read lines from a reader while truncating lines that exceed the destination buffer's size.
16 changes: 16 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
module github.com/asymmetric-research/go-commons

go 1.23.0

require (
github.com/mitchellh/go-linereader v0.0.0-20190213213312-1b945b3263eb
github.com/stretchr/testify v1.9.0
)

require (
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/go-cmd/cmd v1.4.3
github.com/pmezard/go-difflib v1.0.0 // indirect
golang.org/x/exp v0.0.0-20240904232852-e7e105dedf7e
gopkg.in/yaml.v3 v3.0.1 // indirect
)
16 changes: 16 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/go-cmd/cmd v1.4.3 h1:6y3G+3UqPerXvPcXvj+5QNPHT02BUw7p6PsqRxLNA7Y=
github.com/go-cmd/cmd v1.4.3/go.mod h1:u3hxg/ry+D5kwh8WvUkHLAMe2zQCaXd00t35WfQaOFk=
github.com/mitchellh/go-linereader v0.0.0-20190213213312-1b945b3263eb h1:GRiLv4rgyqjqzxbhJke65IYUf4NCOOvrPOJbV/sPxkM=
github.com/mitchellh/go-linereader v0.0.0-20190213213312-1b945b3263eb/go.mod h1:OaY7UOoTkkrX3wRwjpYRKafIkkyeD0UtweSHAWWiqQM=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
golang.org/x/exp v0.0.0-20240904232852-e7e105dedf7e h1:I88y4caeGeuDQxgdoFPUq097j7kNfw6uvuiNxUBfcBk=
golang.org/x/exp v0.0.0-20240904232852-e7e105dedf7e/go.mod h1:akd2r19cwCdwSwWeIdzYQGa/EZZyqcOdwWiwj5L5eKQ=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
Binary file added io.test
Binary file not shown.
32 changes: 32 additions & 0 deletions io/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# io

## LineReader
### Usage
```go
lr := NewLineReader(reader, 4098 /* blockSize */)
line := [12288]byte{}

var err error
for err == nil {
n, ntrunc, err := lr.Read(line[:])
lastline := line[:n]
fmt.Println("%d bytes didn't fit", ntrunc)
}
```

### Benchmarks
```
go test -benchmem -benchtime=5s -bench=. ./io/...
goos: linux
goarch: amd64
pkg: github.com/asymmetric-research/go-commons/io
cpu: AMD Ryzen 9 5950X 16-Core Processor
BenchmarkLineReaderUnbuffered-32 1237466 4726 ns/op 22560 B/op 5 allocs/op
BenchmarkHashicorpsUnbuffered-32 4712 1345807 ns/op 2295415 B/op 29602 allocs/op
BenchmarkGoCmdUnbuffered-32 236834 24471 ns/op 41636 B/op 289 allocs/op
BenchmarkLineReader-32 2206489 2722 ns/op 12328 B/op 4 allocs/op
BenchmarkHashicorps-32 4239 1416668 ns/op 2285070 B/op 29601 allocs/op
BenchmarkGoCmd-32 272906 21842 ns/op 31563 B/op 292 allocs/op
PASS
ok github.com/asymmetric-research/go-commons/io 44.384s
```
131 changes: 131 additions & 0 deletions io/line_reader.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
package io

import (
"bytes"
"io"

armath "github.com/asymmetric-research/go-commons/math"
)

type LineReader struct {
reader io.Reader
readbufbase []byte
readbuf []byte
blocksize uint
}

func NewLineReader(reader io.Reader, blockSize uint) *LineReader {
lr := &LineReader{}
NewlineReaderInto(lr, reader, blockSize)
return lr
}

func NewlineReaderInto(dst *LineReader, reader io.Reader, blockSize uint) {
*dst = LineReader{
reader: reader,
readbufbase: make([]byte, blockSize),
blocksize: blockSize,
}
}

// Read reads as much as possible into p, until the next newline or EOF is reached.
// Every new call to read starts on a new line. The remainder of the previous line will be discarted.
func (lr *LineReader) Read(dst []byte) (nread int, ndiscarted int, err error) {
// copy as much of read buffer as possible to dst
if len(lr.readbuf) > 0 {
// fast path: can we get a new line from the read buffer?
maxread := armath.Min(len(dst), len(lr.readbuf))
eolidx := bytes.IndexByte(lr.readbuf[:maxread], '\n')
if eolidx >= 0 && eolidx < len(dst) {
// yes - copy to dst and return
copy(dst[:eolidx], lr.readbuf)
lr.readbuf = lr.readbuf[eolidx+1:]
return eolidx, 0, nil
}

// no - copy as much of the read buffer as possible to dst, and then continue reading from reader
n := copy(dst, lr.readbuf)
nread += n
lr.readbuf = lr.readbuf[n:]
dst = dst[n:]

}

for i := uint(0); ; i++ {
readOffset := lr.blocksize * i
readLimit := armath.Min(readOffset+lr.blocksize, uint(len(dst)))

// dst has been filled and there hasn't been a new line yet
if readLimit <= readOffset {
ndiscarted = lr.discardRestOfLine()
return
}

dstClamp := dst[readOffset:readLimit]
var n int
n, err = lr.reader.Read(dstClamp)
dstClamp = dstClamp[:n]
nread += n

if err == io.EOF && n == 0 {
return
} else if err != nil {
return
}

// is there a end of line in this block?
eolidx := bytes.IndexByte(dstClamp, '\n')

if eolidx < 0 {
continue
}

// discard the new line character
nread -= 1

// is new line at the end of read?
if eolidx == int(readLimit)-1 {
// yes
return

}

// copy the data after the end of line into the read buffer
cpyn := copy(lr.readbufbase, dstClamp[eolidx+1:])
lr.readbuf = lr.readbufbase[:cpyn]
nread -= n - eolidx - 1
return
}
}

func (lr *LineReader) discardRestOfLine() int {
// discard the rest of the line in the read buffer

if len(lr.readbuf) > 0 {
if idx := bytes.IndexByte(lr.readbuf, '\n'); idx >= 0 {
lr.readbuf = lr.readbuf[idx+1:]
return idx
} else {
lr.readbuf = nil
}
}

// discard the rest of the line in the reader

prevread := 0
for {
n, err := lr.reader.Read(lr.readbufbase)
lr.readbuf = lr.readbufbase[:n]
if err != nil {
return n
}

eolidx := bytes.IndexByte(lr.readbuf, '\n')

if eolidx >= 0 {
lr.readbuf = lr.readbuf[eolidx+1:]
return eolidx + prevread
}
prevread += n
}
}
Loading

0 comments on commit 9a766a4

Please sign in to comment.