-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
8 changed files
with
724 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,8 @@ | ||
# go-commons | ||
A kitchen sync of data structures and algorithms in Go | ||
A kitchen sink of data structures and algorithms in Go | ||
|
||
# Contents | ||
|
||
## io | ||
### LineReader | ||
Read lines from a reader while truncating lines that exceed the destination buffer's size. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
module github.com/asymmetric-research/go-commons | ||
|
||
go 1.23.0 | ||
|
||
require ( | ||
github.com/mitchellh/go-linereader v0.0.0-20190213213312-1b945b3263eb | ||
github.com/stretchr/testify v1.9.0 | ||
) | ||
|
||
require ( | ||
github.com/davecgh/go-spew v1.1.1 // indirect | ||
github.com/go-cmd/cmd v1.4.3 | ||
github.com/pmezard/go-difflib v1.0.0 // indirect | ||
golang.org/x/exp v0.0.0-20240904232852-e7e105dedf7e | ||
gopkg.in/yaml.v3 v3.0.1 // indirect | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= | ||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= | ||
github.com/go-cmd/cmd v1.4.3 h1:6y3G+3UqPerXvPcXvj+5QNPHT02BUw7p6PsqRxLNA7Y= | ||
github.com/go-cmd/cmd v1.4.3/go.mod h1:u3hxg/ry+D5kwh8WvUkHLAMe2zQCaXd00t35WfQaOFk= | ||
github.com/mitchellh/go-linereader v0.0.0-20190213213312-1b945b3263eb h1:GRiLv4rgyqjqzxbhJke65IYUf4NCOOvrPOJbV/sPxkM= | ||
github.com/mitchellh/go-linereader v0.0.0-20190213213312-1b945b3263eb/go.mod h1:OaY7UOoTkkrX3wRwjpYRKafIkkyeD0UtweSHAWWiqQM= | ||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= | ||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= | ||
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= | ||
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= | ||
golang.org/x/exp v0.0.0-20240904232852-e7e105dedf7e h1:I88y4caeGeuDQxgdoFPUq097j7kNfw6uvuiNxUBfcBk= | ||
golang.org/x/exp v0.0.0-20240904232852-e7e105dedf7e/go.mod h1:akd2r19cwCdwSwWeIdzYQGa/EZZyqcOdwWiwj5L5eKQ= | ||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= | ||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= | ||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= | ||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
# io | ||
|
||
## LineReader | ||
### Usage | ||
```go | ||
lr := NewLineReader(reader, 4098 /* blockSize */) | ||
line := [12288]byte{} | ||
|
||
var err error | ||
for err == nil { | ||
n, ntrunc, err := lr.Read(line[:]) | ||
lastline := line[:n] | ||
fmt.Println("%d bytes didn't fit", ntrunc) | ||
} | ||
``` | ||
|
||
### Benchmarks | ||
``` | ||
go test -benchmem -benchtime=5s -bench=. ./io/... | ||
goos: linux | ||
goarch: amd64 | ||
pkg: github.com/asymmetric-research/go-commons/io | ||
cpu: AMD Ryzen 9 5950X 16-Core Processor | ||
BenchmarkLineReaderUnbuffered-32 1237466 4726 ns/op 22560 B/op 5 allocs/op | ||
BenchmarkHashicorpsUnbuffered-32 4712 1345807 ns/op 2295415 B/op 29602 allocs/op | ||
BenchmarkGoCmdUnbuffered-32 236834 24471 ns/op 41636 B/op 289 allocs/op | ||
BenchmarkLineReader-32 2206489 2722 ns/op 12328 B/op 4 allocs/op | ||
BenchmarkHashicorps-32 4239 1416668 ns/op 2285070 B/op 29601 allocs/op | ||
BenchmarkGoCmd-32 272906 21842 ns/op 31563 B/op 292 allocs/op | ||
PASS | ||
ok github.com/asymmetric-research/go-commons/io 44.384s | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,131 @@ | ||
package io | ||
|
||
import ( | ||
"bytes" | ||
"io" | ||
|
||
armath "github.com/asymmetric-research/go-commons/math" | ||
) | ||
|
||
type LineReader struct { | ||
reader io.Reader | ||
readbufbase []byte | ||
readbuf []byte | ||
blocksize uint | ||
} | ||
|
||
func NewLineReader(reader io.Reader, blockSize uint) *LineReader { | ||
lr := &LineReader{} | ||
NewlineReaderInto(lr, reader, blockSize) | ||
return lr | ||
} | ||
|
||
func NewlineReaderInto(dst *LineReader, reader io.Reader, blockSize uint) { | ||
*dst = LineReader{ | ||
reader: reader, | ||
readbufbase: make([]byte, blockSize), | ||
blocksize: blockSize, | ||
} | ||
} | ||
|
||
// Read reads as much as possible into p, until the next newline or EOF is reached. | ||
// Every new call to read starts on a new line. The remainder of the previous line will be discarted. | ||
func (lr *LineReader) Read(dst []byte) (nread int, ndiscarted int, err error) { | ||
// copy as much of read buffer as possible to dst | ||
if len(lr.readbuf) > 0 { | ||
// fast path: can we get a new line from the read buffer? | ||
maxread := armath.Min(len(dst), len(lr.readbuf)) | ||
eolidx := bytes.IndexByte(lr.readbuf[:maxread], '\n') | ||
if eolidx >= 0 && eolidx < len(dst) { | ||
// yes - copy to dst and return | ||
copy(dst[:eolidx], lr.readbuf) | ||
lr.readbuf = lr.readbuf[eolidx+1:] | ||
return eolidx, 0, nil | ||
} | ||
|
||
// no - copy as much of the read buffer as possible to dst, and then continue reading from reader | ||
n := copy(dst, lr.readbuf) | ||
nread += n | ||
lr.readbuf = lr.readbuf[n:] | ||
dst = dst[n:] | ||
|
||
} | ||
|
||
for i := uint(0); ; i++ { | ||
readOffset := lr.blocksize * i | ||
readLimit := armath.Min(readOffset+lr.blocksize, uint(len(dst))) | ||
|
||
// dst has been filled and there hasn't been a new line yet | ||
if readLimit <= readOffset { | ||
ndiscarted = lr.discardRestOfLine() | ||
return | ||
} | ||
|
||
dstClamp := dst[readOffset:readLimit] | ||
var n int | ||
n, err = lr.reader.Read(dstClamp) | ||
dstClamp = dstClamp[:n] | ||
nread += n | ||
|
||
if err == io.EOF && n == 0 { | ||
return | ||
} else if err != nil { | ||
return | ||
} | ||
|
||
// is there a end of line in this block? | ||
eolidx := bytes.IndexByte(dstClamp, '\n') | ||
|
||
if eolidx < 0 { | ||
continue | ||
} | ||
|
||
// discard the new line character | ||
nread -= 1 | ||
|
||
// is new line at the end of read? | ||
if eolidx == int(readLimit)-1 { | ||
// yes | ||
return | ||
|
||
} | ||
|
||
// copy the data after the end of line into the read buffer | ||
cpyn := copy(lr.readbufbase, dstClamp[eolidx+1:]) | ||
lr.readbuf = lr.readbufbase[:cpyn] | ||
nread -= n - eolidx - 1 | ||
return | ||
} | ||
} | ||
|
||
func (lr *LineReader) discardRestOfLine() int { | ||
// discard the rest of the line in the read buffer | ||
|
||
if len(lr.readbuf) > 0 { | ||
if idx := bytes.IndexByte(lr.readbuf, '\n'); idx >= 0 { | ||
lr.readbuf = lr.readbuf[idx+1:] | ||
return idx | ||
} else { | ||
lr.readbuf = nil | ||
} | ||
} | ||
|
||
// discard the rest of the line in the reader | ||
|
||
prevread := 0 | ||
for { | ||
n, err := lr.reader.Read(lr.readbufbase) | ||
lr.readbuf = lr.readbufbase[:n] | ||
if err != nil { | ||
return n | ||
} | ||
|
||
eolidx := bytes.IndexByte(lr.readbuf, '\n') | ||
|
||
if eolidx >= 0 { | ||
lr.readbuf = lr.readbuf[eolidx+1:] | ||
return eolidx + prevread | ||
} | ||
prevread += n | ||
} | ||
} |
Oops, something went wrong.