Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

incremental gzip uncompress #91

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 63 additions & 0 deletions src/zippy/gzip.nim
Original file line number Diff line number Diff line change
Expand Up @@ -75,3 +75,66 @@ proc uncompressGzip*(

if isize != (dst.len mod (1 shl 32)).uint32:
raise newException(ZippyError, "Size verification failed")


iterator uncompressGzipStream*(
src: ptr UncheckedArray[uint8],
len: int
): string =
# Assumes the gzip src data only contains one file.
if len < 18:
failUncompress()

let
id1 = src[0].uint8
id2 = src[1].uint8
cm = src[2].uint8
flg = src[3].uint8
# mtime = src[4 .. 7]
# xfl = src[8]
# os = src[9]

if id1 != 31 or id2 != 139:
raise newException(ZippyError, "Failed gzip identification values check")

if cm != 8: # DEFLATE
raise newException(ZippyError, "Unsupported compression method")

if (flg and 0b11100000) > 0.uint8:
raise newException(ZippyError, "Reserved flag bits set")

let
# ftext = (flg and (1.uint8 shl 0)) != 0
fhcrc = (flg and (1.uint8 shl 1)) != 0.uint8
fextra = (flg and (1.uint8 shl 2)) != 0.uint8
fname = (flg and (1.uint8 shl 3)) != 0.uint8
fcomment = (flg and (1.uint8 shl 4)) != 0.uint8

var pos = 10

if fextra:
raise newException(ZippyError, "Currently unsupported flags are set")

proc nextZeroByte(src: ptr UncheckedArray[uint8], len, start: int): int =
for i in start ..< len:
if src[i] == 0:
return i
failUncompress()

if fname:
pos = nextZeroByte(src, len, pos) + 1

if fcomment:
pos = nextZeroByte(src, len, pos) + 1

if fhcrc:
if pos + 2 >= len:
failUncompress()
# TODO: Need to implement this with a test file
pos += 2

if pos + 8 >= len:
failUncompress()

for blok in inflateStream(src, len, pos):
yield blok
45 changes: 45 additions & 0 deletions src/zippy/inflate.nim
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ else:
const
fastBits = 9
fastMask = (1 shl fastBits) - 1
maxLookBack = 32_768

type Huffman = object
firstCode, firstSymbol: array[16, uint16]
Expand Down Expand Up @@ -290,5 +291,49 @@ proc inflate*(dst: var string, src: ptr UncheckedArray[uint8], len, pos: int) =

dst.setLen(op)

iterator inflateStream*(src: ptr UncheckedArray[uint8], len, pos: int): string =
var
b = BitStreamReader(src: src, len: len, pos: pos)
op: int
finalBlock: bool
dst: string

var count = 0
while not finalBlock:
let
bfinal = b.readBits(1)
btype = b.readBits(2)

if bfinal != 0.uint16:
finalBlock = true

case btype:
of 0: # No compression
inflateNoCompression(dst, b, op)
of 1: # Compressed with fixed Huffman codes
inflateBlock(dst, b, op, true)
of 2: # Compressed with dynamic Huffman codes
inflateBlock(dst, b, op, false)
else:
raise newException(ZippyError, "Invalid block header")

count += 1
dst.setLen(op)
if op >= maxLookBack:
let tailEnd = op - maxLookBack
let tail = dst[0 ..< tailEnd] # outside of lookback range
let head = dst[tailEnd .. ^1] # inside of lookback range
op = maxLookBack # reset op to same character position
dst = head
yield tail
else:
# small files may be smaller than the max lookback range
yield dst

if count > 1:
# final block when multiple
yield dst


when defined(release):
{.pop.}
14 changes: 12 additions & 2 deletions tests/bench.nim
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import benchy, std/strformat, zip/zlib, zippy
import benchy, std/strformat, zip/zlib, zippy, zippy/gzip

const
zs = [
Expand All @@ -8,6 +8,7 @@ const
"randtest3.gz",
"paper-100k.pdf.gz",
"geo.protodata.gz",
"gzipfiletest_large.txt.gz",
"tor-list.gz"
]
golds = [
Expand All @@ -17,7 +18,7 @@ const
"randtest3.gold",
"paper-100k.pdf",
"geo.protodata",
"gzipfiletest.txt",
"gzipfiletest_large.txt",
"tor-list.gold"
]

Expand Down Expand Up @@ -68,3 +69,12 @@ for z in zs:
let compressed = readFile(&"tests/data/{z}")
timeIt z:
discard zlib.uncompress(compressed)

echo "https://github.com/guzba/zippy uncompress streaming"
let z = "gzipfiletest_large.txt.gz"
timeIt z:
let compressed = readFile(&"tests/data/{z}")
var uncompressed = ""
let data = cast[ptr UncheckedArray[uint8]](compressed.cstring)
for blok in uncompressGzipStream(data, compressed.len):
uncompressed.add(blok)
Loading