From 9bccfa5da1011c5c38507317afe86b8df99afc0b Mon Sep 17 00:00:00 2001 From: Klaus Post Date: Sun, 28 Jul 2019 11:51:59 +0200 Subject: [PATCH 1/4] Remove the last decoder allocation ``` benchmark old ns/op new ns/op delta BenchmarkDecoder_DecodeAll/kppkn.gtb.zst-12 608499 610005 +0.25% BenchmarkDecoder_DecodeAll/geo.protodata.zst-12 136501 137699 +0.88% BenchmarkDecoder_DecodeAll/plrabn12.txt.zst-12 1967989 1944000 -1.22% BenchmarkDecoder_DecodeAll/lcet10.txt.zst-12 1448010 1444000 -0.28% BenchmarkDecoder_DecodeAll/asyoulik.txt.zst-12 504333 504333 +0.00% BenchmarkDecoder_DecodeAll/alice29.txt.zst-12 650494 652504 +0.31% BenchmarkDecoder_DecodeAll/html_x_4.zst-12 267000 267199 +0.07% BenchmarkDecoder_DecodeAll/paper-100k.pdf.zst-12 24840 24659 -0.73% BenchmarkDecoder_DecodeAll/fireworks.jpeg.zst-12 9594 9639 +0.47% BenchmarkDecoder_DecodeAll/urls.10K.zst-12 1662010 1670000 +0.48% BenchmarkDecoder_DecodeAll/html.zst-12 156400 155601 -0.51% benchmark old MB/s new MB/s speedup BenchmarkDecoder_DecodeAll/kppkn.gtb.zst-12 302.91 302.16 1.00x BenchmarkDecoder_DecodeAll/geo.protodata.zst-12 868.77 861.21 0.99x BenchmarkDecoder_DecodeAll/plrabn12.txt.zst-12 244.85 247.87 1.01x BenchmarkDecoder_DecodeAll/lcet10.txt.zst-12 294.72 295.54 1.00x BenchmarkDecoder_DecodeAll/asyoulik.txt.zst-12 248.21 248.21 1.00x BenchmarkDecoder_DecodeAll/alice29.txt.zst-12 233.81 233.09 1.00x BenchmarkDecoder_DecodeAll/html_x_4.zst-12 1534.08 1532.93 1.00x BenchmarkDecoder_DecodeAll/paper-100k.pdf.zst-12 4122.35 4152.47 1.01x BenchmarkDecoder_DecodeAll/fireworks.jpeg.zst-12 12828.87 12769.05 1.00x BenchmarkDecoder_DecodeAll/urls.10K.zst-12 422.43 420.41 1.00x BenchmarkDecoder_DecodeAll/html.zst-12 654.73 658.09 1.01x benchmark old allocs new allocs delta BenchmarkDecoder_DecodeAll/kppkn.gtb.zst-12 1 0 -100.00% BenchmarkDecoder_DecodeAll/geo.protodata.zst-12 1 0 -100.00% BenchmarkDecoder_DecodeAll/plrabn12.txt.zst-12 1 0 -100.00% BenchmarkDecoder_DecodeAll/lcet10.txt.zst-12 1 0 -100.00% BenchmarkDecoder_DecodeAll/asyoulik.txt.zst-12 1 0 -100.00% BenchmarkDecoder_DecodeAll/alice29.txt.zst-12 1 0 -100.00% BenchmarkDecoder_DecodeAll/html_x_4.zst-12 1 0 -100.00% BenchmarkDecoder_DecodeAll/paper-100k.pdf.zst-12 1 0 -100.00% BenchmarkDecoder_DecodeAll/fireworks.jpeg.zst-12 1 0 -100.00% BenchmarkDecoder_DecodeAll/urls.10K.zst-12 1 0 -100.00% BenchmarkDecoder_DecodeAll/html.zst-12 1 0 -100.00% benchmark old bytes new bytes delta BenchmarkDecoder_DecodeAll/kppkn.gtb.zst-12 32 0 -100.00% BenchmarkDecoder_DecodeAll/geo.protodata.zst-12 32 0 -100.00% BenchmarkDecoder_DecodeAll/plrabn12.txt.zst-12 32 0 -100.00% BenchmarkDecoder_DecodeAll/lcet10.txt.zst-12 32 0 -100.00% BenchmarkDecoder_DecodeAll/asyoulik.txt.zst-12 32 0 -100.00% BenchmarkDecoder_DecodeAll/alice29.txt.zst-12 32 0 -100.00% BenchmarkDecoder_DecodeAll/html_x_4.zst-12 32 0 -100.00% BenchmarkDecoder_DecodeAll/paper-100k.pdf.zst-12 32 0 -100.00% BenchmarkDecoder_DecodeAll/fireworks.jpeg.zst-12 32 0 -100.00% BenchmarkDecoder_DecodeAll/urls.10K.zst-12 32 0 -100.00% BenchmarkDecoder_DecodeAll/html.zst-12 32 0 -100.00% ``` --- zstd/README.md | 6 ++++-- zstd/decoder.go | 12 +++++++++--- zstd/framedec.go | 3 +++ 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/zstd/README.md b/zstd/README.md index 670f98af44..d9d38b23f1 100644 --- a/zstd/README.md +++ b/zstd/README.md @@ -34,7 +34,8 @@ For now, a high speed (fastest) and medium-fast (default) compressor has been im The "Fastest" compression ratio is roughly equivalent to zstd level 1. The "Default" compression ration is roughly equivalent to zstd level 3 (default). -In terms of speed, it is typically 2x as fast as the stdlib deflate/gzip in its fastest mode. The compression ratio compared to stdlib is around level 3, but usually 3x as fast. +In terms of speed, it is typically 2x as fast as the stdlib deflate/gzip in its fastest mode. +The compression ratio compared to stdlib is around level 3, but usually 3x as fast. Compared to cgo zstd, the speed is around level 3 (default), but compression slightly worse, between level 1&2. @@ -217,7 +218,8 @@ silesia.tar zstd 3 211947520 66793301 1377 146.79 As part of the development process a *Snappy* -> *Zstandard* converter was also built. -This can convert a *framed* [Snappy Stream](https://godoc.org/github.com/golang/snappy#Writer) to a zstd stream. Note that a single block is not framed. +This can convert a *framed* [Snappy Stream](https://godoc.org/github.com/golang/snappy#Writer) to a zstd stream. +Note that a single block is not framed. Conversion is done by converting the stream directly from Snappy without intermediate full decoding. Therefore the compression ratio is much less than what can be done by a full decompression diff --git a/zstd/decoder.go b/zstd/decoder.go index 2bd2a1300c..5a529a5690 100644 --- a/zstd/decoder.go +++ b/zstd/decoder.go @@ -289,9 +289,15 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) { } // Allocation here: - br := byteBuf(input) + br := frame.bBuf + if br == nil { + br = &byteBuf{} + frame.bBuf = br + } + *br = input + for { - err := frame.reset(&br) + err := frame.reset(br) if err == io.EOF { return dst, nil } @@ -313,7 +319,7 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) { if err != nil { return dst, err } - if len(br) == 0 { + if len(*br) == 0 { break } } diff --git a/zstd/framedec.go b/zstd/framedec.go index 7f1e225d72..4e4433dd5b 100644 --- a/zstd/framedec.go +++ b/zstd/framedec.go @@ -39,6 +39,9 @@ type frameDec struct { rawInput byteBuffer + // Optional byte buffer that can be reused. + bBuf *byteBuf + // asyncRunning indicates whether the async routine processes input on 'decoding'. asyncRunning bool asyncRunningMu sync.Mutex From eb1fb4feeb808b81e88369cd02e1235b3b016a41 Mon Sep 17 00:00:00 2001 From: Klaus Post Date: Sun, 28 Jul 2019 12:52:23 +0200 Subject: [PATCH 2/4] Simplify and remove input before stashing the frame decoder. --- zstd/decoder.go | 9 ++------- zstd/framedec.go | 2 +- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/zstd/decoder.go b/zstd/decoder.go index 5a529a5690..8d0a1c22a6 100644 --- a/zstd/decoder.go +++ b/zstd/decoder.go @@ -281,19 +281,14 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) { } d.decoders <- block frame.rawInput = nil + frame.bBuf = nil d.frames <- frame }() if cap(dst) == 0 { // Allocate 1MB by default if nothing is provided. dst = make([]byte, 0, 1<<20) } - - // Allocation here: - br := frame.bBuf - if br == nil { - br = &byteBuf{} - frame.bBuf = br - } + br := &frame.bBuf *br = input for { diff --git a/zstd/framedec.go b/zstd/framedec.go index 4e4433dd5b..35c19b3936 100644 --- a/zstd/framedec.go +++ b/zstd/framedec.go @@ -40,7 +40,7 @@ type frameDec struct { rawInput byteBuffer // Optional byte buffer that can be reused. - bBuf *byteBuf + bBuf byteBuf // asyncRunning indicates whether the async routine processes input on 'decoding'. asyncRunning bool From de07644b895a1e800e98293cfd749a105b3ff4a1 Mon Sep 17 00:00:00 2001 From: Klaus Post Date: Sun, 28 Jul 2019 12:54:58 +0200 Subject: [PATCH 3/4] simplify even more. --- zstd/decoder.go | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/zstd/decoder.go b/zstd/decoder.go index 8d0a1c22a6..a32a34cb6c 100644 --- a/zstd/decoder.go +++ b/zstd/decoder.go @@ -284,15 +284,14 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) { frame.bBuf = nil d.frames <- frame }() + frame.bBuf = input if cap(dst) == 0 { // Allocate 1MB by default if nothing is provided. dst = make([]byte, 0, 1<<20) } - br := &frame.bBuf - *br = input for { - err := frame.reset(br) + err := frame.reset(&frame.bBuf) if err == io.EOF { return dst, nil } @@ -314,7 +313,7 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) { if err != nil { return dst, err } - if len(*br) == 0 { + if len(frame.bBuf) == 0 { break } } From e380902916f15b19b7b18cbc71c86d5cbeaf9528 Mon Sep 17 00:00:00 2001 From: Klaus Post Date: Sun, 28 Jul 2019 12:56:19 +0200 Subject: [PATCH 4/4] Doc --- zstd/framedec.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zstd/framedec.go b/zstd/framedec.go index 35c19b3936..0c2a623074 100644 --- a/zstd/framedec.go +++ b/zstd/framedec.go @@ -39,7 +39,7 @@ type frameDec struct { rawInput byteBuffer - // Optional byte buffer that can be reused. + // Byte buffer that can be reused for small input blocks. bBuf byteBuf // asyncRunning indicates whether the async routine processes input on 'decoding'.