Update dependencies and go1.18 (#1873)

* Update dependencies and go1.18

* Exclude unnecessary linters and update build to go1.18
This commit is contained in:
Wim
2022-08-13 16:14:26 +02:00
committed by GitHub
parent 3c4192ebf6
commit 6a3fc71397
396 changed files with 350176 additions and 309792 deletions

View File

@@ -17,6 +17,27 @@ This package provides various compression algorithms.
# changelog
* June 29, 2022 (v1.15.7)
* s2: Fix absolute forward seeks https://github.com/klauspost/compress/pull/633
* zip: Merge upstream https://github.com/klauspost/compress/pull/631
* zip: Re-add zip64 fix https://github.com/klauspost/compress/pull/624
* zstd: translate fseDecoder.buildDtable into asm by @WojciechMula in https://github.com/klauspost/compress/pull/598
* flate: Faster histograms https://github.com/klauspost/compress/pull/620
* deflate: Use compound hcode https://github.com/klauspost/compress/pull/622
* June 3, 2022 (v1.15.6)
* s2: Improve coding for long, close matches https://github.com/klauspost/compress/pull/613
* s2c: Add Snappy/S2 stream recompression https://github.com/klauspost/compress/pull/611
* zstd: Always use configured block size https://github.com/klauspost/compress/pull/605
* zstd: Fix incorrect hash table placement for dict encoding in default https://github.com/klauspost/compress/pull/606
* zstd: Apply default config to ZipDecompressor without options https://github.com/klauspost/compress/pull/608
* gzhttp: Exclude more common archive formats https://github.com/klauspost/compress/pull/612
* s2: Add ReaderIgnoreCRC https://github.com/klauspost/compress/pull/609
* s2: Remove sanity load on index creation https://github.com/klauspost/compress/pull/607
* snappy: Use dedicated function for scoring https://github.com/klauspost/compress/pull/614
* s2c+s2d: Use official snappy framed extension https://github.com/klauspost/compress/pull/610
* May 25, 2022 (v1.15.5)
* s2: Add concurrent stream decompression https://github.com/klauspost/compress/pull/602
* s2: Fix final emit oob read crash on amd64 https://github.com/klauspost/compress/pull/601

View File

@@ -27,10 +27,7 @@ func decompress4x_8b_main_loop_amd64(ctx *decompress4xContext)
const fallback8BitSize = 800
type decompress4xContext struct {
pbr0 *bitReaderShifted
pbr1 *bitReaderShifted
pbr2 *bitReaderShifted
pbr3 *bitReaderShifted
pbr *[4]bitReaderShifted
peekBits uint8
out *byte
dstEvery int
@@ -89,10 +86,7 @@ func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) {
if len(out) > 4*4 && !(br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4) {
ctx := decompress4xContext{
pbr0: &br[0],
pbr1: &br[1],
pbr2: &br[2],
pbr3: &br[3],
pbr: &br,
peekBits: uint8((64 - d.actualTableLog) & 63), // see: bitReaderShifted.peekBitsFast()
out: &out[0],
dstEvery: dstEvery,

View File

@@ -4,45 +4,40 @@
// +build amd64,!appengine,!noasm,gc
// func decompress4x_main_loop_amd64(ctx *decompress4xContext)
TEXT ·decompress4x_main_loop_amd64(SB), $8-8
TEXT ·decompress4x_main_loop_amd64(SB), $0-8
XORQ DX, DX
// Preload values
MOVQ ctx+0(FP), AX
MOVBQZX 32(AX), SI
MOVQ 40(AX), DI
MOVQ DI, BX
MOVQ 72(AX), CX
MOVQ CX, (SP)
MOVQ 48(AX), R8
MOVQ 56(AX), R9
MOVQ (AX), R10
MOVQ 8(AX), R11
MOVQ 16(AX), R12
MOVQ 24(AX), R13
MOVBQZX 8(AX), DI
MOVQ 16(AX), SI
MOVQ 48(AX), BX
MOVQ 24(AX), R9
MOVQ 32(AX), R10
MOVQ (AX), R11
// Main loop
main_loop:
MOVQ BX, DI
CMPQ DI, (SP)
MOVQ SI, R8
CMPQ R8, BX
SETGE DL
// br0.fillFast32()
MOVQ 32(R10), R14
MOVBQZX 40(R10), R15
CMPQ R15, $0x20
MOVQ 32(R11), R12
MOVBQZX 40(R11), R13
CMPQ R13, $0x20
JBE skip_fill0
MOVQ 24(R10), AX
SUBQ $0x20, R15
MOVQ 24(R11), AX
SUBQ $0x20, R13
SUBQ $0x04, AX
MOVQ (R10), BP
MOVQ (R11), R14
// b.value |= uint64(low) << (b.bitsRead & 63)
MOVL (AX)(BP*1), BP
MOVQ R15, CX
SHLQ CL, BP
MOVQ AX, 24(R10)
ORQ BP, R14
MOVL (AX)(R14*1), R14
MOVQ R13, CX
SHLQ CL, R14
MOVQ AX, 24(R11)
ORQ R14, R12
// exhausted = exhausted || (br0.off < 4)
CMPQ AX, $0x04
@@ -51,57 +46,57 @@ main_loop:
skip_fill0:
// val0 := br0.peekTopBits(peekBits)
MOVQ R14, BP
MOVQ SI, CX
SHRQ CL, BP
MOVQ R12, R14
MOVQ DI, CX
SHRQ CL, R14
// v0 := table[val0&mask]
MOVW (R9)(BP*2), CX
MOVW (R10)(R14*2), CX
// br0.advance(uint8(v0.entry)
MOVB CH, AL
SHLQ CL, R14
ADDB CL, R15
SHLQ CL, R12
ADDB CL, R13
// val1 := br0.peekTopBits(peekBits)
MOVQ SI, CX
MOVQ R14, BP
SHRQ CL, BP
MOVQ DI, CX
MOVQ R12, R14
SHRQ CL, R14
// v1 := table[val1&mask]
MOVW (R9)(BP*2), CX
MOVW (R10)(R14*2), CX
// br0.advance(uint8(v1.entry))
MOVB CH, AH
SHLQ CL, R14
ADDB CL, R15
SHLQ CL, R12
ADDB CL, R13
// these two writes get coalesced
// out[id * dstEvery + 0] = uint8(v0.entry >> 8)
// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
MOVW AX, (DI)
MOVW AX, (R8)
// update the bitrader reader structure
MOVQ R14, 32(R10)
MOVB R15, 40(R10)
ADDQ R8, DI
// update the bitreader structure
MOVQ R12, 32(R11)
MOVB R13, 40(R11)
ADDQ R9, R8
// br1.fillFast32()
MOVQ 32(R11), R14
MOVBQZX 40(R11), R15
CMPQ R15, $0x20
MOVQ 80(R11), R12
MOVBQZX 88(R11), R13
CMPQ R13, $0x20
JBE skip_fill1
MOVQ 24(R11), AX
SUBQ $0x20, R15
MOVQ 72(R11), AX
SUBQ $0x20, R13
SUBQ $0x04, AX
MOVQ (R11), BP
MOVQ 48(R11), R14
// b.value |= uint64(low) << (b.bitsRead & 63)
MOVL (AX)(BP*1), BP
MOVQ R15, CX
SHLQ CL, BP
MOVQ AX, 24(R11)
ORQ BP, R14
MOVL (AX)(R14*1), R14
MOVQ R13, CX
SHLQ CL, R14
MOVQ AX, 72(R11)
ORQ R14, R12
// exhausted = exhausted || (br1.off < 4)
CMPQ AX, $0x04
@@ -110,57 +105,57 @@ skip_fill0:
skip_fill1:
// val0 := br1.peekTopBits(peekBits)
MOVQ R14, BP
MOVQ SI, CX
SHRQ CL, BP
MOVQ R12, R14
MOVQ DI, CX
SHRQ CL, R14
// v0 := table[val0&mask]
MOVW (R9)(BP*2), CX
MOVW (R10)(R14*2), CX
// br1.advance(uint8(v0.entry)
MOVB CH, AL
SHLQ CL, R14
ADDB CL, R15
SHLQ CL, R12
ADDB CL, R13
// val1 := br1.peekTopBits(peekBits)
MOVQ SI, CX
MOVQ R14, BP
SHRQ CL, BP
MOVQ DI, CX
MOVQ R12, R14
SHRQ CL, R14
// v1 := table[val1&mask]
MOVW (R9)(BP*2), CX
MOVW (R10)(R14*2), CX
// br1.advance(uint8(v1.entry))
MOVB CH, AH
SHLQ CL, R14
ADDB CL, R15
SHLQ CL, R12
ADDB CL, R13
// these two writes get coalesced
// out[id * dstEvery + 0] = uint8(v0.entry >> 8)
// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
MOVW AX, (DI)
MOVW AX, (R8)
// update the bitrader reader structure
MOVQ R14, 32(R11)
MOVB R15, 40(R11)
ADDQ R8, DI
// update the bitreader structure
MOVQ R12, 80(R11)
MOVB R13, 88(R11)
ADDQ R9, R8
// br2.fillFast32()
MOVQ 32(R12), R14
MOVBQZX 40(R12), R15
CMPQ R15, $0x20
MOVQ 128(R11), R12
MOVBQZX 136(R11), R13
CMPQ R13, $0x20
JBE skip_fill2
MOVQ 24(R12), AX
SUBQ $0x20, R15
MOVQ 120(R11), AX
SUBQ $0x20, R13
SUBQ $0x04, AX
MOVQ (R12), BP
MOVQ 96(R11), R14
// b.value |= uint64(low) << (b.bitsRead & 63)
MOVL (AX)(BP*1), BP
MOVQ R15, CX
SHLQ CL, BP
MOVQ AX, 24(R12)
ORQ BP, R14
MOVL (AX)(R14*1), R14
MOVQ R13, CX
SHLQ CL, R14
MOVQ AX, 120(R11)
ORQ R14, R12
// exhausted = exhausted || (br2.off < 4)
CMPQ AX, $0x04
@@ -169,57 +164,57 @@ skip_fill1:
skip_fill2:
// val0 := br2.peekTopBits(peekBits)
MOVQ R14, BP
MOVQ SI, CX
SHRQ CL, BP
MOVQ R12, R14
MOVQ DI, CX
SHRQ CL, R14
// v0 := table[val0&mask]
MOVW (R9)(BP*2), CX
MOVW (R10)(R14*2), CX
// br2.advance(uint8(v0.entry)
MOVB CH, AL
SHLQ CL, R14
ADDB CL, R15
SHLQ CL, R12
ADDB CL, R13
// val1 := br2.peekTopBits(peekBits)
MOVQ SI, CX
MOVQ R14, BP
SHRQ CL, BP
MOVQ DI, CX
MOVQ R12, R14
SHRQ CL, R14
// v1 := table[val1&mask]
MOVW (R9)(BP*2), CX
MOVW (R10)(R14*2), CX
// br2.advance(uint8(v1.entry))
MOVB CH, AH
SHLQ CL, R14
ADDB CL, R15
SHLQ CL, R12
ADDB CL, R13
// these two writes get coalesced
// out[id * dstEvery + 0] = uint8(v0.entry >> 8)
// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
MOVW AX, (DI)
MOVW AX, (R8)
// update the bitrader reader structure
MOVQ R14, 32(R12)
MOVB R15, 40(R12)
ADDQ R8, DI
// update the bitreader structure
MOVQ R12, 128(R11)
MOVB R13, 136(R11)
ADDQ R9, R8
// br3.fillFast32()
MOVQ 32(R13), R14
MOVBQZX 40(R13), R15
CMPQ R15, $0x20
MOVQ 176(R11), R12
MOVBQZX 184(R11), R13
CMPQ R13, $0x20
JBE skip_fill3
MOVQ 24(R13), AX
SUBQ $0x20, R15
MOVQ 168(R11), AX
SUBQ $0x20, R13
SUBQ $0x04, AX
MOVQ (R13), BP
MOVQ 144(R11), R14
// b.value |= uint64(low) << (b.bitsRead & 63)
MOVL (AX)(BP*1), BP
MOVQ R15, CX
SHLQ CL, BP
MOVQ AX, 24(R13)
ORQ BP, R14
MOVL (AX)(R14*1), R14
MOVQ R13, CX
SHLQ CL, R14
MOVQ AX, 168(R11)
ORQ R14, R12
// exhausted = exhausted || (br3.off < 4)
CMPQ AX, $0x04
@@ -228,149 +223,142 @@ skip_fill2:
skip_fill3:
// val0 := br3.peekTopBits(peekBits)
MOVQ R14, BP
MOVQ SI, CX
SHRQ CL, BP
MOVQ R12, R14
MOVQ DI, CX
SHRQ CL, R14
// v0 := table[val0&mask]
MOVW (R9)(BP*2), CX
MOVW (R10)(R14*2), CX
// br3.advance(uint8(v0.entry)
MOVB CH, AL
SHLQ CL, R14
ADDB CL, R15
SHLQ CL, R12
ADDB CL, R13
// val1 := br3.peekTopBits(peekBits)
MOVQ SI, CX
MOVQ R14, BP
SHRQ CL, BP
MOVQ DI, CX
MOVQ R12, R14
SHRQ CL, R14
// v1 := table[val1&mask]
MOVW (R9)(BP*2), CX
MOVW (R10)(R14*2), CX
// br3.advance(uint8(v1.entry))
MOVB CH, AH
SHLQ CL, R14
ADDB CL, R15
SHLQ CL, R12
ADDB CL, R13
// these two writes get coalesced
// out[id * dstEvery + 0] = uint8(v0.entry >> 8)
// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
MOVW AX, (DI)
MOVW AX, (R8)
// update the bitrader reader structure
MOVQ R14, 32(R13)
MOVB R15, 40(R13)
ADDQ $0x02, BX
// update the bitreader structure
MOVQ R12, 176(R11)
MOVB R13, 184(R11)
ADDQ $0x02, SI
TESTB DL, DL
JZ main_loop
MOVQ ctx+0(FP), AX
MOVQ 40(AX), CX
MOVQ BX, DX
SUBQ CX, DX
SHLQ $0x02, DX
MOVQ DX, 64(AX)
SUBQ 16(AX), SI
SHLQ $0x02, SI
MOVQ SI, 40(AX)
RET
// func decompress4x_8b_main_loop_amd64(ctx *decompress4xContext)
TEXT ·decompress4x_8b_main_loop_amd64(SB), $16-8
TEXT ·decompress4x_8b_main_loop_amd64(SB), $0-8
XORQ DX, DX
// Preload values
MOVQ ctx+0(FP), CX
MOVBQZX 32(CX), BX
MOVQ 40(CX), SI
MOVQ SI, (SP)
MOVQ 72(CX), DX
MOVQ DX, 8(SP)
MOVQ 48(CX), DI
MOVQ 56(CX), R8
MOVQ (CX), R9
MOVQ 8(CX), R10
MOVQ 16(CX), R11
MOVQ 24(CX), R12
MOVBQZX 8(CX), DI
MOVQ 16(CX), BX
MOVQ 48(CX), SI
MOVQ 24(CX), R9
MOVQ 32(CX), R10
MOVQ (CX), R11
// Main loop
main_loop:
MOVQ (SP), SI
CMPQ SI, 8(SP)
MOVQ BX, R8
CMPQ R8, SI
SETGE DL
// br1000.fillFast32()
MOVQ 32(R9), R13
MOVBQZX 40(R9), R14
CMPQ R14, $0x20
JBE skip_fill1000
MOVQ 24(R9), R15
SUBQ $0x20, R14
SUBQ $0x04, R15
MOVQ (R9), BP
// br0.fillFast32()
MOVQ 32(R11), R12
MOVBQZX 40(R11), R13
CMPQ R13, $0x20
JBE skip_fill0
MOVQ 24(R11), R14
SUBQ $0x20, R13
SUBQ $0x04, R14
MOVQ (R11), R15
// b.value |= uint64(low) << (b.bitsRead & 63)
MOVL (R15)(BP*1), BP
MOVQ R14, CX
SHLQ CL, BP
MOVQ R15, 24(R9)
ORQ BP, R13
MOVL (R14)(R15*1), R15
MOVQ R13, CX
SHLQ CL, R15
MOVQ R14, 24(R11)
ORQ R15, R12
// exhausted = exhausted || (br1000.off < 4)
CMPQ R15, $0x04
// exhausted = exhausted || (br0.off < 4)
CMPQ R14, $0x04
SETLT AL
ORB AL, DL
skip_fill1000:
skip_fill0:
// val0 := br0.peekTopBits(peekBits)
MOVQ R13, R15
MOVQ BX, CX
SHRQ CL, R15
MOVQ R12, R14
MOVQ DI, CX
SHRQ CL, R14
// v0 := table[val0&mask]
MOVW (R8)(R15*2), CX
MOVW (R10)(R14*2), CX
// br0.advance(uint8(v0.entry)
MOVB CH, AL
SHLQ CL, R13
ADDB CL, R14
SHLQ CL, R12
ADDB CL, R13
// val1 := br0.peekTopBits(peekBits)
MOVQ R13, R15
MOVQ BX, CX
SHRQ CL, R15
MOVQ R12, R14
MOVQ DI, CX
SHRQ CL, R14
// v1 := table[val0&mask]
MOVW (R8)(R15*2), CX
MOVW (R10)(R14*2), CX
// br0.advance(uint8(v1.entry)
MOVB CH, AH
SHLQ CL, R13
ADDB CL, R14
SHLQ CL, R12
ADDB CL, R13
BSWAPL AX
// val2 := br0.peekTopBits(peekBits)
MOVQ R13, R15
MOVQ BX, CX
SHRQ CL, R15
MOVQ R12, R14
MOVQ DI, CX
SHRQ CL, R14
// v2 := table[val0&mask]
MOVW (R8)(R15*2), CX
MOVW (R10)(R14*2), CX
// br0.advance(uint8(v2.entry)
MOVB CH, AH
SHLQ CL, R13
ADDB CL, R14
SHLQ CL, R12
ADDB CL, R13
// val3 := br0.peekTopBits(peekBits)
MOVQ R13, R15
MOVQ BX, CX
SHRQ CL, R15
MOVQ R12, R14
MOVQ DI, CX
SHRQ CL, R14
// v3 := table[val0&mask]
MOVW (R8)(R15*2), CX
MOVW (R10)(R14*2), CX
// br0.advance(uint8(v3.entry)
MOVB CH, AL
SHLQ CL, R13
ADDB CL, R14
SHLQ CL, R12
ADDB CL, R13
BSWAPL AX
// these four writes get coalesced
@@ -378,88 +366,88 @@ skip_fill1000:
// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
// out[id * dstEvery + 3] = uint8(v2.entry >> 8)
// out[id * dstEvery + 4] = uint8(v3.entry >> 8)
MOVL AX, (SI)
MOVL AX, (R8)
// update the bitreader reader structure
MOVQ R13, 32(R9)
MOVB R14, 40(R9)
ADDQ DI, SI
// update the bitreader structure
MOVQ R12, 32(R11)
MOVB R13, 40(R11)
ADDQ R9, R8
// br1001.fillFast32()
MOVQ 32(R10), R13
MOVBQZX 40(R10), R14
CMPQ R14, $0x20
JBE skip_fill1001
MOVQ 24(R10), R15
SUBQ $0x20, R14
SUBQ $0x04, R15
MOVQ (R10), BP
// br1.fillFast32()
MOVQ 80(R11), R12
MOVBQZX 88(R11), R13
CMPQ R13, $0x20
JBE skip_fill1
MOVQ 72(R11), R14
SUBQ $0x20, R13
SUBQ $0x04, R14
MOVQ 48(R11), R15
// b.value |= uint64(low) << (b.bitsRead & 63)
MOVL (R15)(BP*1), BP
MOVQ R14, CX
SHLQ CL, BP
MOVQ R15, 24(R10)
ORQ BP, R13
MOVL (R14)(R15*1), R15
MOVQ R13, CX
SHLQ CL, R15
MOVQ R14, 72(R11)
ORQ R15, R12
// exhausted = exhausted || (br1001.off < 4)
CMPQ R15, $0x04
// exhausted = exhausted || (br1.off < 4)
CMPQ R14, $0x04
SETLT AL
ORB AL, DL
skip_fill1001:
skip_fill1:
// val0 := br1.peekTopBits(peekBits)
MOVQ R13, R15
MOVQ BX, CX
SHRQ CL, R15
MOVQ R12, R14
MOVQ DI, CX
SHRQ CL, R14
// v0 := table[val0&mask]
MOVW (R8)(R15*2), CX
MOVW (R10)(R14*2), CX
// br1.advance(uint8(v0.entry)
MOVB CH, AL
SHLQ CL, R13
ADDB CL, R14
SHLQ CL, R12
ADDB CL, R13
// val1 := br1.peekTopBits(peekBits)
MOVQ R13, R15
MOVQ BX, CX
SHRQ CL, R15
MOVQ R12, R14
MOVQ DI, CX
SHRQ CL, R14
// v1 := table[val0&mask]
MOVW (R8)(R15*2), CX
MOVW (R10)(R14*2), CX
// br1.advance(uint8(v1.entry)
MOVB CH, AH
SHLQ CL, R13
ADDB CL, R14
SHLQ CL, R12
ADDB CL, R13
BSWAPL AX
// val2 := br1.peekTopBits(peekBits)
MOVQ R13, R15
MOVQ BX, CX
SHRQ CL, R15
MOVQ R12, R14
MOVQ DI, CX
SHRQ CL, R14
// v2 := table[val0&mask]
MOVW (R8)(R15*2), CX
MOVW (R10)(R14*2), CX
// br1.advance(uint8(v2.entry)
MOVB CH, AH
SHLQ CL, R13
ADDB CL, R14
SHLQ CL, R12
ADDB CL, R13
// val3 := br1.peekTopBits(peekBits)
MOVQ R13, R15
MOVQ BX, CX
SHRQ CL, R15
MOVQ R12, R14
MOVQ DI, CX
SHRQ CL, R14
// v3 := table[val0&mask]
MOVW (R8)(R15*2), CX
MOVW (R10)(R14*2), CX
// br1.advance(uint8(v3.entry)
MOVB CH, AL
SHLQ CL, R13
ADDB CL, R14
SHLQ CL, R12
ADDB CL, R13
BSWAPL AX
// these four writes get coalesced
@@ -467,88 +455,88 @@ skip_fill1001:
// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
// out[id * dstEvery + 3] = uint8(v2.entry >> 8)
// out[id * dstEvery + 4] = uint8(v3.entry >> 8)
MOVL AX, (SI)
MOVL AX, (R8)
// update the bitreader reader structure
MOVQ R13, 32(R10)
MOVB R14, 40(R10)
ADDQ DI, SI
// update the bitreader structure
MOVQ R12, 80(R11)
MOVB R13, 88(R11)
ADDQ R9, R8
// br1002.fillFast32()
MOVQ 32(R11), R13
MOVBQZX 40(R11), R14
CMPQ R14, $0x20
JBE skip_fill1002
MOVQ 24(R11), R15
SUBQ $0x20, R14
SUBQ $0x04, R15
MOVQ (R11), BP
// br2.fillFast32()
MOVQ 128(R11), R12
MOVBQZX 136(R11), R13
CMPQ R13, $0x20
JBE skip_fill2
MOVQ 120(R11), R14
SUBQ $0x20, R13
SUBQ $0x04, R14
MOVQ 96(R11), R15
// b.value |= uint64(low) << (b.bitsRead & 63)
MOVL (R15)(BP*1), BP
MOVQ R14, CX
SHLQ CL, BP
MOVQ R15, 24(R11)
ORQ BP, R13
MOVL (R14)(R15*1), R15
MOVQ R13, CX
SHLQ CL, R15
MOVQ R14, 120(R11)
ORQ R15, R12
// exhausted = exhausted || (br1002.off < 4)
CMPQ R15, $0x04
// exhausted = exhausted || (br2.off < 4)
CMPQ R14, $0x04
SETLT AL
ORB AL, DL
skip_fill1002:
skip_fill2:
// val0 := br2.peekTopBits(peekBits)
MOVQ R13, R15
MOVQ BX, CX
SHRQ CL, R15
MOVQ R12, R14
MOVQ DI, CX
SHRQ CL, R14
// v0 := table[val0&mask]
MOVW (R8)(R15*2), CX
MOVW (R10)(R14*2), CX
// br2.advance(uint8(v0.entry)
MOVB CH, AL
SHLQ CL, R13
ADDB CL, R14
SHLQ CL, R12
ADDB CL, R13
// val1 := br2.peekTopBits(peekBits)
MOVQ R13, R15
MOVQ BX, CX
SHRQ CL, R15
MOVQ R12, R14
MOVQ DI, CX
SHRQ CL, R14
// v1 := table[val0&mask]
MOVW (R8)(R15*2), CX
MOVW (R10)(R14*2), CX
// br2.advance(uint8(v1.entry)
MOVB CH, AH
SHLQ CL, R13
ADDB CL, R14
SHLQ CL, R12
ADDB CL, R13
BSWAPL AX
// val2 := br2.peekTopBits(peekBits)
MOVQ R13, R15
MOVQ BX, CX
SHRQ CL, R15
MOVQ R12, R14
MOVQ DI, CX
SHRQ CL, R14
// v2 := table[val0&mask]
MOVW (R8)(R15*2), CX
MOVW (R10)(R14*2), CX
// br2.advance(uint8(v2.entry)
MOVB CH, AH
SHLQ CL, R13
ADDB CL, R14
SHLQ CL, R12
ADDB CL, R13
// val3 := br2.peekTopBits(peekBits)
MOVQ R13, R15
MOVQ BX, CX
SHRQ CL, R15
MOVQ R12, R14
MOVQ DI, CX
SHRQ CL, R14
// v3 := table[val0&mask]
MOVW (R8)(R15*2), CX
MOVW (R10)(R14*2), CX
// br2.advance(uint8(v3.entry)
MOVB CH, AL
SHLQ CL, R13
ADDB CL, R14
SHLQ CL, R12
ADDB CL, R13
BSWAPL AX
// these four writes get coalesced
@@ -556,88 +544,88 @@ skip_fill1002:
// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
// out[id * dstEvery + 3] = uint8(v2.entry >> 8)
// out[id * dstEvery + 4] = uint8(v3.entry >> 8)
MOVL AX, (SI)
MOVL AX, (R8)
// update the bitreader reader structure
MOVQ R13, 32(R11)
MOVB R14, 40(R11)
ADDQ DI, SI
// update the bitreader structure
MOVQ R12, 128(R11)
MOVB R13, 136(R11)
ADDQ R9, R8
// br1003.fillFast32()
MOVQ 32(R12), R13
MOVBQZX 40(R12), R14
CMPQ R14, $0x20
JBE skip_fill1003
MOVQ 24(R12), R15
SUBQ $0x20, R14
SUBQ $0x04, R15
MOVQ (R12), BP
// br3.fillFast32()
MOVQ 176(R11), R12
MOVBQZX 184(R11), R13
CMPQ R13, $0x20
JBE skip_fill3
MOVQ 168(R11), R14
SUBQ $0x20, R13
SUBQ $0x04, R14
MOVQ 144(R11), R15
// b.value |= uint64(low) << (b.bitsRead & 63)
MOVL (R15)(BP*1), BP
MOVQ R14, CX
SHLQ CL, BP
MOVQ R15, 24(R12)
ORQ BP, R13
MOVL (R14)(R15*1), R15
MOVQ R13, CX
SHLQ CL, R15
MOVQ R14, 168(R11)
ORQ R15, R12
// exhausted = exhausted || (br1003.off < 4)
CMPQ R15, $0x04
// exhausted = exhausted || (br3.off < 4)
CMPQ R14, $0x04
SETLT AL
ORB AL, DL
skip_fill1003:
skip_fill3:
// val0 := br3.peekTopBits(peekBits)
MOVQ R13, R15
MOVQ BX, CX
SHRQ CL, R15
MOVQ R12, R14
MOVQ DI, CX
SHRQ CL, R14
// v0 := table[val0&mask]
MOVW (R8)(R15*2), CX
MOVW (R10)(R14*2), CX
// br3.advance(uint8(v0.entry)
MOVB CH, AL
SHLQ CL, R13
ADDB CL, R14
SHLQ CL, R12
ADDB CL, R13
// val1 := br3.peekTopBits(peekBits)
MOVQ R13, R15
MOVQ BX, CX
SHRQ CL, R15
MOVQ R12, R14
MOVQ DI, CX
SHRQ CL, R14
// v1 := table[val0&mask]
MOVW (R8)(R15*2), CX
MOVW (R10)(R14*2), CX
// br3.advance(uint8(v1.entry)
MOVB CH, AH
SHLQ CL, R13
ADDB CL, R14
SHLQ CL, R12
ADDB CL, R13
BSWAPL AX
// val2 := br3.peekTopBits(peekBits)
MOVQ R13, R15
MOVQ BX, CX
SHRQ CL, R15
MOVQ R12, R14
MOVQ DI, CX
SHRQ CL, R14
// v2 := table[val0&mask]
MOVW (R8)(R15*2), CX
MOVW (R10)(R14*2), CX
// br3.advance(uint8(v2.entry)
MOVB CH, AH
SHLQ CL, R13
ADDB CL, R14
SHLQ CL, R12
ADDB CL, R13
// val3 := br3.peekTopBits(peekBits)
MOVQ R13, R15
MOVQ BX, CX
SHRQ CL, R15
MOVQ R12, R14
MOVQ DI, CX
SHRQ CL, R14
// v3 := table[val0&mask]
MOVW (R8)(R15*2), CX
MOVW (R10)(R14*2), CX
// br3.advance(uint8(v3.entry)
MOVB CH, AL
SHLQ CL, R13
ADDB CL, R14
SHLQ CL, R12
ADDB CL, R13
BSWAPL AX
// these four writes get coalesced
@@ -645,20 +633,18 @@ skip_fill1003:
// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
// out[id * dstEvery + 3] = uint8(v2.entry >> 8)
// out[id * dstEvery + 4] = uint8(v3.entry >> 8)
MOVL AX, (SI)
MOVL AX, (R8)
// update the bitreader reader structure
MOVQ R13, 32(R12)
MOVB R14, 40(R12)
ADDQ $0x04, (SP)
// update the bitreader structure
MOVQ R12, 176(R11)
MOVB R13, 184(R11)
ADDQ $0x04, BX
TESTB DL, DL
JZ main_loop
MOVQ ctx+0(FP), AX
MOVQ 40(AX), CX
MOVQ (SP), DX
SUBQ CX, DX
SHLQ $0x02, DX
MOVQ DX, 64(AX)
SUBQ 16(AX), BX
SHLQ $0x02, BX
MOVQ BX, 40(AX)
RET
// func decompress1x_main_loop_amd64(ctx *decompress1xContext)
@@ -750,10 +736,8 @@ loop_condition:
// Update ctx structure
MOVQ ctx+0(FP), AX
MOVQ DX, CX
MOVQ 16(AX), DX
SUBQ DX, CX
MOVQ CX, 40(AX)
SUBQ 16(AX), DX
MOVQ DX, 40(AX)
MOVQ (AX), AX
MOVQ R9, 24(AX)
MOVQ R10, 32(AX)
@@ -847,10 +831,8 @@ loop_condition:
// Update ctx structure
MOVQ ctx+0(FP), AX
MOVQ DX, CX
MOVQ 16(AX), DX
SUBQ DX, CX
MOVQ CX, 40(AX)
SUBQ 16(AX), DX
MOVQ DX, 40(AX)
MOVQ (AX), AX
MOVQ R9, 24(AX)
MOVQ R10, 32(AX)

View File

@@ -791,6 +791,7 @@ func (r *Reader) Skip(n int64) error {
} else {
// Skip block completely
n -= int64(dLen)
r.blockStart += int64(dLen)
dLen = 0
}
r.i, r.j = 0, dLen
@@ -921,6 +922,15 @@ func (r *Reader) ReadSeeker(random bool, index []byte) (*ReadSeeker, error) {
err = r.index.LoadStream(rs)
if err != nil {
if err == ErrUnsupported {
// If we don't require random seeking, reset input and return.
if !random {
_, err = rs.Seek(pos, io.SeekStart)
if err != nil {
return nil, ErrCantSeek{Reason: "resetting stream returned: " + err.Error()}
}
r.index = nil
return &ReadSeeker{Reader: r}, nil
}
return nil, ErrCantSeek{Reason: "input stream does not contain an index"}
}
return nil, ErrCantSeek{Reason: "reading index returned: " + err.Error()}

View File

@@ -533,3 +533,66 @@ func (i *Index) JSON() []byte {
b, _ := json.MarshalIndent(x, "", " ")
return b
}
// RemoveIndexHeaders will trim all headers and trailers from a given index.
// This is expected to save 20 bytes.
// These can be restored using RestoreIndexHeaders.
// This removes a layer of security, but is the most compact representation.
// Returns nil if headers contains errors.
// The returned slice references the provided slice.
func RemoveIndexHeaders(b []byte) []byte {
const save = 4 + len(S2IndexHeader) + len(S2IndexTrailer) + 4
if len(b) <= save {
return nil
}
if b[0] != ChunkTypeIndex {
return nil
}
chunkLen := int(b[1]) | int(b[2])<<8 | int(b[3])<<16
b = b[4:]
// Validate we have enough...
if len(b) < chunkLen {
return nil
}
b = b[:chunkLen]
if !bytes.Equal(b[:len(S2IndexHeader)], []byte(S2IndexHeader)) {
return nil
}
b = b[len(S2IndexHeader):]
if !bytes.HasSuffix(b, []byte(S2IndexTrailer)) {
return nil
}
b = bytes.TrimSuffix(b, []byte(S2IndexTrailer))
if len(b) < 4 {
return nil
}
return b[:len(b)-4]
}
// RestoreIndexHeaders will index restore headers removed by RemoveIndexHeaders.
// No error checking is performed on the input.
// If a 0 length slice is sent, it is returned without modification.
func RestoreIndexHeaders(in []byte) []byte {
if len(in) == 0 {
return in
}
b := make([]byte, 0, 4+len(S2IndexHeader)+len(in)+len(S2IndexTrailer)+4)
b = append(b, ChunkTypeIndex, 0, 0, 0)
b = append(b, []byte(S2IndexHeader)...)
b = append(b, in...)
var tmp [4]byte
binary.LittleEndian.PutUint32(tmp[:], uint32(len(b)+4+len(S2IndexTrailer)))
b = append(b, tmp[:4]...)
// Trailer
b = append(b, []byte(S2IndexTrailer)...)
chunkLen := len(b) - skippableFrameHeader
b[1] = uint8(chunkLen >> 0)
b[2] = uint8(chunkLen >> 8)
b[3] = uint8(chunkLen >> 16)
return b
}

View File

@@ -180,7 +180,6 @@ func (s *fseDecoder) readNCount(b *byteReader, maxSymbol uint16) error {
return fmt.Errorf("corruption detected (total %d != %d)", gotTotal, 1<<s.actualTableLog)
}
b.advance((bitCount + 7) >> 3)
// println(s.norm[:s.symbolLen], s.symbolLen)
return s.buildDtable()
}
@@ -269,68 +268,6 @@ func (s *fseDecoder) setRLE(symbol decSymbol) {
s.dt[0] = symbol
}
// buildDtable will build the decoding table.
func (s *fseDecoder) buildDtable() error {
tableSize := uint32(1 << s.actualTableLog)
highThreshold := tableSize - 1
symbolNext := s.stateTable[:256]
// Init, lay down lowprob symbols
{
for i, v := range s.norm[:s.symbolLen] {
if v == -1 {
s.dt[highThreshold].setAddBits(uint8(i))
highThreshold--
symbolNext[i] = 1
} else {
symbolNext[i] = uint16(v)
}
}
}
// Spread symbols
{
tableMask := tableSize - 1
step := tableStep(tableSize)
position := uint32(0)
for ss, v := range s.norm[:s.symbolLen] {
for i := 0; i < int(v); i++ {
s.dt[position].setAddBits(uint8(ss))
position = (position + step) & tableMask
for position > highThreshold {
// lowprob area
position = (position + step) & tableMask
}
}
}
if position != 0 {
// position must reach all cells once, otherwise normalizedCounter is incorrect
return errors.New("corrupted input (position != 0)")
}
}
// Build Decoding table
{
tableSize := uint16(1 << s.actualTableLog)
for u, v := range s.dt[:tableSize] {
symbol := v.addBits()
nextState := symbolNext[symbol]
symbolNext[symbol] = nextState + 1
nBits := s.actualTableLog - byte(highBits(uint32(nextState)))
s.dt[u&maxTableMask].setNBits(nBits)
newState := (nextState << nBits) - tableSize
if newState > tableSize {
return fmt.Errorf("newState (%d) outside table size (%d)", newState, tableSize)
}
if newState == uint16(u) && nBits == 0 {
// Seems weird that this is possible with nbits > 0.
return fmt.Errorf("newState (%d) == oldState (%d) and no bits", newState, u)
}
s.dt[u&maxTableMask].setNewState(newState)
}
}
return nil
}
// transform will transform the decoder table into a table usable for
// decoding without having to apply the transformation while decoding.
// The state will contain the base value and the number of bits to read.

View File

@@ -0,0 +1,64 @@
//go:build amd64 && !appengine && !noasm && gc
// +build amd64,!appengine,!noasm,gc
package zstd
import (
"fmt"
)
type buildDtableAsmContext struct {
// inputs
stateTable *uint16
norm *int16
dt *uint64
// outputs --- set by the procedure in the case of error;
// for interpretation please see the error handling part below
errParam1 uint64
errParam2 uint64
}
// buildDtable_asm is an x86 assembly implementation of fseDecoder.buildDtable.
// Function returns non-zero exit code on error.
// go:noescape
func buildDtable_asm(s *fseDecoder, ctx *buildDtableAsmContext) int
// please keep in sync with _generate/gen_fse.go
const (
errorCorruptedNormalizedCounter = 1
errorNewStateTooBig = 2
errorNewStateNoBits = 3
)
// buildDtable will build the decoding table.
func (s *fseDecoder) buildDtable() error {
ctx := buildDtableAsmContext{
stateTable: (*uint16)(&s.stateTable[0]),
norm: (*int16)(&s.norm[0]),
dt: (*uint64)(&s.dt[0]),
}
code := buildDtable_asm(s, &ctx)
if code != 0 {
switch code {
case errorCorruptedNormalizedCounter:
position := ctx.errParam1
return fmt.Errorf("corrupted input (position=%d, expected 0)", position)
case errorNewStateTooBig:
newState := decSymbol(ctx.errParam1)
size := ctx.errParam2
return fmt.Errorf("newState (%d) outside table size (%d)", newState, size)
case errorNewStateNoBits:
newState := decSymbol(ctx.errParam1)
oldState := decSymbol(ctx.errParam2)
return fmt.Errorf("newState (%d) == oldState (%d) and no bits", newState, oldState)
default:
return fmt.Errorf("buildDtable_asm returned unhandled nonzero code = %d", code)
}
}
return nil
}

View File

@@ -0,0 +1,127 @@
// Code generated by command: go run gen_fse.go -out ../fse_decoder_amd64.s -pkg=zstd. DO NOT EDIT.
//go:build !appengine && !noasm && gc && !noasm
// +build !appengine,!noasm,gc,!noasm
// func buildDtable_asm(s *fseDecoder, ctx *buildDtableAsmContext) int
TEXT ·buildDtable_asm(SB), $0-24
MOVQ ctx+8(FP), CX
MOVQ s+0(FP), DI
// Load values
MOVBQZX 4098(DI), DX
XORQ AX, AX
BTSQ DX, AX
MOVQ (CX), BX
MOVQ 16(CX), SI
LEAQ -1(AX), R8
MOVQ 8(CX), CX
MOVWQZX 4096(DI), DI
// End load values
// Init, lay down lowprob symbols
XORQ R9, R9
JMP init_main_loop_condition
init_main_loop:
MOVWQSX (CX)(R9*2), R10
CMPW R10, $-1
JNE do_not_update_high_threshold
MOVB R9, 1(SI)(R8*8)
DECQ R8
MOVQ $0x0000000000000001, R10
do_not_update_high_threshold:
MOVW R10, (BX)(R9*2)
INCQ R9
init_main_loop_condition:
CMPQ R9, DI
JL init_main_loop
// Spread symbols
// Calculate table step
MOVQ AX, R9
SHRQ $0x01, R9
MOVQ AX, R10
SHRQ $0x03, R10
LEAQ 3(R9)(R10*1), R9
// Fill add bits values
LEAQ -1(AX), R10
XORQ R11, R11
XORQ R12, R12
JMP spread_main_loop_condition
spread_main_loop:
XORQ R13, R13
MOVWQSX (CX)(R12*2), R14
JMP spread_inner_loop_condition
spread_inner_loop:
MOVB R12, 1(SI)(R11*8)
adjust_position:
ADDQ R9, R11
ANDQ R10, R11
CMPQ R11, R8
JG adjust_position
INCQ R13
spread_inner_loop_condition:
CMPQ R13, R14
JL spread_inner_loop
INCQ R12
spread_main_loop_condition:
CMPQ R12, DI
JL spread_main_loop
TESTQ R11, R11
JZ spread_check_ok
MOVQ ctx+8(FP), AX
MOVQ R11, 24(AX)
MOVQ $+1, ret+16(FP)
RET
spread_check_ok:
// Build Decoding table
XORQ DI, DI
build_table_main_table:
MOVBQZX 1(SI)(DI*8), CX
MOVWQZX (BX)(CX*2), R8
LEAQ 1(R8), R9
MOVW R9, (BX)(CX*2)
MOVQ R8, R9
BSRQ R9, R9
MOVQ DX, CX
SUBQ R9, CX
SHLQ CL, R8
SUBQ AX, R8
MOVB CL, (SI)(DI*8)
MOVW R8, 2(SI)(DI*8)
CMPQ R8, AX
JLE build_table_check1_ok
MOVQ ctx+8(FP), CX
MOVQ R8, 24(CX)
MOVQ AX, 32(CX)
MOVQ $+2, ret+16(FP)
RET
build_table_check1_ok:
TESTB CL, CL
JNZ build_table_check2_ok
CMPW R8, DI
JNE build_table_check2_ok
MOVQ ctx+8(FP), AX
MOVQ R8, 24(AX)
MOVQ DI, 32(AX)
MOVQ $+3, ret+16(FP)
RET
build_table_check2_ok:
INCQ DI
CMPQ DI, AX
JL build_table_main_table
MOVQ $+0, ret+16(FP)
RET

View File

@@ -0,0 +1,72 @@
//go:build !amd64 || appengine || !gc || noasm
// +build !amd64 appengine !gc noasm
package zstd
import (
"errors"
"fmt"
)
// buildDtable will build the decoding table.
func (s *fseDecoder) buildDtable() error {
tableSize := uint32(1 << s.actualTableLog)
highThreshold := tableSize - 1
symbolNext := s.stateTable[:256]
// Init, lay down lowprob symbols
{
for i, v := range s.norm[:s.symbolLen] {
if v == -1 {
s.dt[highThreshold].setAddBits(uint8(i))
highThreshold--
symbolNext[i] = 1
} else {
symbolNext[i] = uint16(v)
}
}
}
// Spread symbols
{
tableMask := tableSize - 1
step := tableStep(tableSize)
position := uint32(0)
for ss, v := range s.norm[:s.symbolLen] {
for i := 0; i < int(v); i++ {
s.dt[position].setAddBits(uint8(ss))
position = (position + step) & tableMask
for position > highThreshold {
// lowprob area
position = (position + step) & tableMask
}
}
}
if position != 0 {
// position must reach all cells once, otherwise normalizedCounter is incorrect
return errors.New("corrupted input (position != 0)")
}
}
// Build Decoding table
{
tableSize := uint16(1 << s.actualTableLog)
for u, v := range s.dt[:tableSize] {
symbol := v.addBits()
nextState := symbolNext[symbol]
symbolNext[symbol] = nextState + 1
nBits := s.actualTableLog - byte(highBits(uint32(nextState)))
s.dt[u&maxTableMask].setNBits(nBits)
newState := (nextState << nBits) - tableSize
if newState > tableSize {
return fmt.Errorf("newState (%d) outside table size (%d)", newState, tableSize)
}
if newState == uint16(u) && nBits == 0 {
// Seems weird that this is possible with nbits > 0.
return fmt.Errorf("newState (%d) == oldState (%d) and no bits", newState, u)
}
s.dt[u&maxTableMask].setNewState(newState)
}
}
return nil
}

File diff suppressed because it is too large Load Diff