feat: Waku v2 bridge

Issue #12610
2023-11-12 13:29:38 +01:00
parent 56e7bd01ca
commit 6d31343205
6716 changed files with 1982502 additions and 5891 deletions
--- a/vendor/github.com/klauspost/compress/zstd/README.md
+++ b/vendor/github.com/klauspost/compress/zstd/README.md
@@ -304,7 +304,7 @@ import "github.com/klauspost/compress/zstd"

 // Create a reader that caches decompressors.
 // For this operation type we supply a nil Reader.
-var decoder, _ = zstd.NewReader(nil, WithDecoderConcurrency(0))
+var decoder, _ = zstd.NewReader(nil, zstd.WithDecoderConcurrency(0))

 // Decompress a buffer. We don't supply a destination buffer,
 // so it will be allocated by the decoder.
--- a/vendor/github.com/klauspost/compress/zstd/blockdec.go
+++ b/vendor/github.com/klauspost/compress/zstd/blockdec.go
@@ -9,6 +9,7 @@ import (
 	"encoding/binary"
 	"errors"
 	"fmt"
+	"hash/crc32"
 	"io"
 	"os"
 	"path/filepath"
@@ -442,6 +443,9 @@ func (b *blockDec) decodeLiterals(in []byte, hist *history) (remain []byte, err
 			}
 		}
 		var err error
+		if debugDecoder {
+			println("huff table input:", len(literals), "CRC:", crc32.ChecksumIEEE(literals))
+		}
 		huff, literals, err = huff0.ReadTable(literals, huff)
 		if err != nil {
 			println("reading huffman table:", err)
@@ -588,7 +592,7 @@ func (b *blockDec) prepareSequences(in []byte, hist *history) (err error) {
 				}
 				seq.fse.setRLE(symb)
 				if debugDecoder {
-					printf("RLE set to %+v, code: %v", symb, v)
+					printf("RLE set to 0x%x, code: %v", symb, v)
 				}
 			case compModeFSE:
 				println("Reading table for", tableIndex(i))
--- a/vendor/github.com/klauspost/compress/zstd/blockenc.go
+++ b/vendor/github.com/klauspost/compress/zstd/blockenc.go
@@ -473,7 +473,7 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
 		return b.encodeLits(b.literals, rawAllLits)
 	}
 	// We want some difference to at least account for the headers.
-	saved := b.size - len(b.literals) - (b.size >> 5)
+	saved := b.size - len(b.literals) - (b.size >> 6)
 	if saved < 16 {
 		if org == nil {
 			return errIncompressible
@@ -779,10 +779,13 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
 	}
 	b.output = wr.out

+	// Maybe even add a bigger margin.
 	if len(b.output)-3-bhOffset >= b.size {
-		// Maybe even add a bigger margin.
+		// Discard and encode as raw block.
+		b.output = b.encodeRawTo(b.output[:bhOffset], org)
+		b.popOffsets()
 		b.litEnc.Reuse = huff0.ReusePolicyNone
-		return errIncompressible
+		return nil
 	}

 	// Size is output minus block header.
--- a/vendor/github.com/klauspost/compress/zstd/bytebuf.go
+++ b/vendor/github.com/klauspost/compress/zstd/bytebuf.go
@@ -54,7 +54,7 @@ func (b *byteBuf) readBig(n int, dst []byte) ([]byte, error) {
 func (b *byteBuf) readByte() (byte, error) {
 	bb := *b
 	if len(bb) < 1 {
-		return 0, nil
+		return 0, io.ErrUnexpectedEOF
 	}
 	r := bb[0]
 	*b = bb[1:]
@@ -109,7 +109,7 @@ func (r *readerWrapper) readBig(n int, dst []byte) ([]byte, error) {
 }

 func (r *readerWrapper) readByte() (byte, error) {
-	n2, err := r.r.Read(r.tmp[:1])
+	n2, err := io.ReadFull(r.r, r.tmp[:1])
 	if err != nil {
 		if err == io.EOF {
 			err = io.ErrUnexpectedEOF
--- a/vendor/github.com/klauspost/compress/zstd/decoder.go
+++ b/vendor/github.com/klauspost/compress/zstd/decoder.go
@@ -455,12 +455,7 @@ func (d *Decoder) nextBlock(blocking bool) (ok bool) {
 	}

 	if len(next.b) > 0 {
-		n, err := d.current.crc.Write(next.b)
-		if err == nil {
-			if n != len(next.b) {
-				d.current.err = io.ErrShortWrite
-			}
-		}
+		d.current.crc.Write(next.b)
 	}
 	if next.err == nil && next.d != nil && next.d.hasCRC {
 		got := uint32(d.current.crc.Sum64())
--- a/vendor/github.com/klauspost/compress/zstd/decoder_options.go
+++ b/vendor/github.com/klauspost/compress/zstd/decoder_options.go
@@ -107,7 +107,7 @@ func WithDecoderDicts(dicts ...[]byte) DOption {
 	}
 }

-// WithEncoderDictRaw registers a dictionary that may be used by the decoder.
+// WithDecoderDictRaw registers a dictionary that may be used by the decoder.
 // The slice content can be arbitrary data.
 func WithDecoderDictRaw(id uint32, content []byte) DOption {
 	return func(o *decoderOptions) error {
--- a/vendor/github.com/klauspost/compress/zstd/enc_base.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_base.go
@@ -144,6 +144,7 @@ func (e *fastBase) resetBase(d *dict, singleBlock bool) {
 	} else {
 		e.crc.Reset()
 	}
+	e.blk.dictLitEnc = nil
 	if d != nil {
 		low := e.lowMem
 		if singleBlock {
--- a/vendor/github.com/klauspost/compress/zstd/enc_best.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_best.go
@@ -32,10 +32,9 @@ type match struct {
 	length int32
 	rep    int32
 	est    int32
-	_      [12]byte // Aligned size to cache line: 4+4+4+4+4 bytes + 12 bytes padding = 32 bytes
 }

-const highScore = 25000
+const highScore = maxMatchLen * 8

 // estBits will estimate output bits from predefined tables.
 func (m *match) estBits(bitsPerByte int32) {
@@ -160,7 +159,6 @@ func (e *bestFastEncoder) Encode(blk *blockEnc, src []byte) {

 	// nextEmit is where in src the next emitLiteral should start from.
 	nextEmit := s
-	cv := load6432(src, s)

 	// Relative offsets
 	offset1 := int32(blk.recentOffsets[0])
@@ -174,7 +172,6 @@ func (e *bestFastEncoder) Encode(blk *blockEnc, src []byte) {
 		blk.literals = append(blk.literals, src[nextEmit:until]...)
 		s.litLen = uint32(until - nextEmit)
 	}
-	_ = addLiterals

 	if debugEncoder {
 		println("recent offsets:", blk.recentOffsets)
@@ -189,53 +186,96 @@ encodeLoop:
 			panic("offset0 was 0")
 		}

-		bestOf := func(a, b *match) *match {
-			if a.est-b.est+(a.s-b.s)*bitsPerByte>>10 < 0 {
-				return a
-			}
-			return b
-		}
-		const goodEnough = 100
+		const goodEnough = 250
+
+		cv := load6432(src, s)

 		nextHashL := hashLen(cv, bestLongTableBits, bestLongLen)
 		nextHashS := hashLen(cv, bestShortTableBits, bestShortLen)
 		candidateL := e.longTable[nextHashL]
 		candidateS := e.table[nextHashS]

-		matchAt := func(offset int32, s int32, first uint32, rep int32) match {
+		// Set m to a match at offset if it looks like that will improve compression.
+		improve := func(m *match, offset int32, s int32, first uint32, rep int32) {
 			if s-offset >= e.maxMatchOff || load3232(src, offset) != first {
-				return match{s: s, est: highScore}
+				return
 			}
 			if debugAsserts {
+				if offset <= 0 {
+					panic(offset)
+				}
 				if !bytes.Equal(src[s:s+4], src[offset:offset+4]) {
 					panic(fmt.Sprintf("first match mismatch: %v != %v, first: %08x", src[s:s+4], src[offset:offset+4], first))
 				}
 			}
-			m := match{offset: offset, s: s, length: 4 + e.matchlen(s+4, offset+4, src), rep: rep}
-			m.estBits(bitsPerByte)
-			return m
+			// Try to quick reject if we already have a long match.
+			if m.length > 16 {
+				left := len(src) - int(m.s+m.length)
+				// If we are too close to the end, keep as is.
+				if left <= 0 {
+					return
+				}
+				checkLen := m.length - (s - m.s) - 8
+				if left > 2 && checkLen > 4 {
+					// Check 4 bytes, 4 bytes from the end of the current match.
+					a := load3232(src, offset+checkLen)
+					b := load3232(src, s+checkLen)
+					if a != b {
+						return
+					}
+				}
+			}
+			l := 4 + e.matchlen(s+4, offset+4, src)
+			if rep < 0 {
+				// Extend candidate match backwards as far as possible.
+				tMin := s - e.maxMatchOff
+				if tMin < 0 {
+					tMin = 0
+				}
+				for offset > tMin && s > nextEmit && src[offset-1] == src[s-1] && l < maxMatchLength {
+					s--
+					offset--
+					l++
+				}
+			}
+
+			cand := match{offset: offset, s: s, length: l, rep: rep}
+			cand.estBits(bitsPerByte)
+			if m.est >= highScore || cand.est-m.est+(cand.s-m.s)*bitsPerByte>>10 < 0 {
+				*m = cand
+			}
 		}

-		m1 := matchAt(candidateL.offset-e.cur, s, uint32(cv), -1)
-		m2 := matchAt(candidateL.prev-e.cur, s, uint32(cv), -1)
-		m3 := matchAt(candidateS.offset-e.cur, s, uint32(cv), -1)
-		m4 := matchAt(candidateS.prev-e.cur, s, uint32(cv), -1)
-		best := bestOf(bestOf(&m1, &m2), bestOf(&m3, &m4))
+		best := match{s: s, est: highScore}
+		improve(&best, candidateL.offset-e.cur, s, uint32(cv), -1)
+		improve(&best, candidateL.prev-e.cur, s, uint32(cv), -1)
+		improve(&best, candidateS.offset-e.cur, s, uint32(cv), -1)
+		improve(&best, candidateS.prev-e.cur, s, uint32(cv), -1)

 		if canRepeat && best.length < goodEnough {
-			cv32 := uint32(cv >> 8)
-			spp := s + 1
-			m1 := matchAt(spp-offset1, spp, cv32, 1)
-			m2 := matchAt(spp-offset2, spp, cv32, 2)
-			m3 := matchAt(spp-offset3, spp, cv32, 3)
-			best = bestOf(bestOf(best, &m1), bestOf(&m2, &m3))
-			if best.length > 0 {
-				cv32 = uint32(cv >> 24)
-				spp += 2
-				m1 := matchAt(spp-offset1, spp, cv32, 1)
-				m2 := matchAt(spp-offset2, spp, cv32, 2)
-				m3 := matchAt(spp-offset3, spp, cv32, 3)
-				best = bestOf(bestOf(best, &m1), bestOf(&m2, &m3))
+			if s == nextEmit {
+				// Check repeats straight after a match.
+				improve(&best, s-offset2, s, uint32(cv), 1|4)
+				improve(&best, s-offset3, s, uint32(cv), 2|4)
+				if offset1 > 1 {
+					improve(&best, s-(offset1-1), s, uint32(cv), 3|4)
+				}
+			}
+
+			// If either no match or a non-repeat match, check at + 1
+			if best.rep <= 0 {
+				cv32 := uint32(cv >> 8)
+				spp := s + 1
+				improve(&best, spp-offset1, spp, cv32, 1)
+				improve(&best, spp-offset2, spp, cv32, 2)
+				improve(&best, spp-offset3, spp, cv32, 3)
+				if best.rep < 0 {
+					cv32 = uint32(cv >> 24)
+					spp += 2
+					improve(&best, spp-offset1, spp, cv32, 1)
+					improve(&best, spp-offset2, spp, cv32, 2)
+					improve(&best, spp-offset3, spp, cv32, 3)
+				}
 			}
 		}
 		// Load next and check...
@@ -250,47 +290,45 @@ encodeLoop:
 				if s >= sLimit {
 					break encodeLoop
 				}
-				cv = load6432(src, s)
 				continue
 			}

-			s++
 			candidateS = e.table[hashLen(cv>>8, bestShortTableBits, bestShortLen)]
-			cv = load6432(src, s)
-			cv2 := load6432(src, s+1)
+			cv = load6432(src, s+1)
+			cv2 := load6432(src, s+2)
 			candidateL = e.longTable[hashLen(cv, bestLongTableBits, bestLongLen)]
 			candidateL2 := e.longTable[hashLen(cv2, bestLongTableBits, bestLongLen)]

 			// Short at s+1
-			m1 := matchAt(candidateS.offset-e.cur, s, uint32(cv), -1)
+			improve(&best, candidateS.offset-e.cur, s+1, uint32(cv), -1)
 			// Long at s+1, s+2
-			m2 := matchAt(candidateL.offset-e.cur, s, uint32(cv), -1)
-			m3 := matchAt(candidateL.prev-e.cur, s, uint32(cv), -1)
-			m4 := matchAt(candidateL2.offset-e.cur, s+1, uint32(cv2), -1)
-			m5 := matchAt(candidateL2.prev-e.cur, s+1, uint32(cv2), -1)
-			best = bestOf(bestOf(bestOf(best, &m1), &m2), bestOf(bestOf(&m3, &m4), &m5))
+			improve(&best, candidateL.offset-e.cur, s+1, uint32(cv), -1)
+			improve(&best, candidateL.prev-e.cur, s+1, uint32(cv), -1)
+			improve(&best, candidateL2.offset-e.cur, s+2, uint32(cv2), -1)
+			improve(&best, candidateL2.prev-e.cur, s+2, uint32(cv2), -1)
 			if false {
 				// Short at s+3.
 				// Too often worse...
-				m := matchAt(e.table[hashLen(cv2>>8, bestShortTableBits, bestShortLen)].offset-e.cur, s+2, uint32(cv2>>8), -1)
-				best = bestOf(best, &m)
+				improve(&best, e.table[hashLen(cv2>>8, bestShortTableBits, bestShortLen)].offset-e.cur, s+3, uint32(cv2>>8), -1)
 			}
-			// See if we can find a better match by checking where the current best ends.
-			// Use that offset to see if we can find a better full match.
-			if sAt := best.s + best.length; sAt < sLimit {
-				nextHashL := hashLen(load6432(src, sAt), bestLongTableBits, bestLongLen)
-				candidateEnd := e.longTable[nextHashL]
-				// Start check at a fixed offset to allow for a few mismatches.
-				// For this compression level 2 yields the best results.
-				const skipBeginning = 2
-				if pos := candidateEnd.offset - e.cur - best.length + skipBeginning; pos >= 0 {
-					m := matchAt(pos, best.s+skipBeginning, load3232(src, best.s+skipBeginning), -1)
-					bestEnd := bestOf(best, &m)
-					if pos := candidateEnd.prev - e.cur - best.length + skipBeginning; pos >= 0 {
-						m := matchAt(pos, best.s+skipBeginning, load3232(src, best.s+skipBeginning), -1)
-						bestEnd = bestOf(bestEnd, &m)
+
+			// Start check at a fixed offset to allow for a few mismatches.
+			// For this compression level 2 yields the best results.
+			// We cannot do this if we have already indexed this position.
+			const skipBeginning = 2
+			if best.s > s-skipBeginning {
+				// See if we can find a better match by checking where the current best ends.
+				// Use that offset to see if we can find a better full match.
+				if sAt := best.s + best.length; sAt < sLimit {
+					nextHashL := hashLen(load6432(src, sAt), bestLongTableBits, bestLongLen)
+					candidateEnd := e.longTable[nextHashL]
+
+					if off := candidateEnd.offset - e.cur - best.length + skipBeginning; off >= 0 {
+						improve(&best, off, best.s+skipBeginning, load3232(src, best.s+skipBeginning), -1)
+						if off := candidateEnd.prev - e.cur - best.length + skipBeginning; off >= 0 {
+							improve(&best, off, best.s+skipBeginning, load3232(src, best.s+skipBeginning), -1)
+						}
 					}
-					best = bestEnd
 				}
 			}
 		}
@@ -303,51 +341,34 @@ encodeLoop:

 		// We have a match, we can store the forward value
 		if best.rep > 0 {
-			s = best.s
 			var seq seq
 			seq.matchLen = uint32(best.length - zstdMinMatch)
-
-			// We might be able to match backwards.
-			// Extend as long as we can.
-			start := best.s
-			// We end the search early, so we don't risk 0 literals
-			// and have to do special offset treatment.
-			startLimit := nextEmit + 1
-
-			tMin := s - e.maxMatchOff
-			if tMin < 0 {
-				tMin = 0
+			if debugAsserts && s <= nextEmit {
+				panic("s <= nextEmit")
 			}
-			repIndex := best.offset
-			for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
-				repIndex--
-				start--
-				seq.matchLen++
-			}
-			addLiterals(&seq, start)
+			addLiterals(&seq, best.s)

-			// rep 0
-			seq.offset = uint32(best.rep)
+			// Repeat. If bit 4 is set, this is a non-lit repeat.
+			seq.offset = uint32(best.rep & 3)
 			if debugSequences {
 				println("repeat sequence", seq, "next s:", s)
 			}
 			blk.sequences = append(blk.sequences, seq)

-			// Index match start+1 (long) -> s - 1
-			index0 := s
+			// Index old s + 1 -> s - 1
+			index0 := s + 1
 			s = best.s + best.length

 			nextEmit = s
 			if s >= sLimit {
 				if debugEncoder {
 					println("repeat ended", s, best.length)
-
 				}
 				break encodeLoop
 			}
 			// Index skipped...
 			off := index0 + e.cur
-			for index0 < s-1 {
+			for index0 < s {
 				cv0 := load6432(src, index0)
 				h0 := hashLen(cv0, bestLongTableBits, bestLongLen)
 				h1 := hashLen(cv0, bestShortTableBits, bestShortLen)
@@ -357,17 +378,19 @@ encodeLoop:
 				index0++
 			}
 			switch best.rep {
-			case 2:
+			case 2, 4 | 1:
 				offset1, offset2 = offset2, offset1
-			case 3:
+			case 3, 4 | 2:
 				offset1, offset2, offset3 = offset3, offset1, offset2
+			case 4 | 3:
+				offset1, offset2, offset3 = offset1-1, offset1, offset2
 			}
-			cv = load6432(src, s)
 			continue
 		}

 		// A 4-byte match has been found. Update recent offsets.
 		// We'll later see if more than 4 bytes.
+		index0 := s + 1
 		s = best.s
 		t := best.offset
 		offset1, offset2, offset3 = s-t, offset1, offset2
@@ -380,22 +403,9 @@ encodeLoop:
 			panic("invalid offset")
 		}

-		// Extend the n-byte match as long as possible.
-		l := best.length
-
-		// Extend backwards
-		tMin := s - e.maxMatchOff
-		if tMin < 0 {
-			tMin = 0
-		}
-		for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
-			s--
-			t--
-			l++
-		}
-
 		// Write our sequence
 		var seq seq
+		l := best.length
 		seq.litLen = uint32(s - nextEmit)
 		seq.matchLen = uint32(l - zstdMinMatch)
 		if seq.litLen > 0 {
@@ -412,10 +422,8 @@ encodeLoop:
 			break encodeLoop
 		}

-		// Index match start+1 (long) -> s - 1
-		index0 := s - l + 1
-		// every entry
-		for index0 < s-1 {
+		// Index old s + 1 -> s - 1
+		for index0 < s {
 			cv0 := load6432(src, index0)
 			h0 := hashLen(cv0, bestLongTableBits, bestLongLen)
 			h1 := hashLen(cv0, bestShortTableBits, bestShortLen)
@@ -424,50 +432,6 @@ encodeLoop:
 			e.table[h1] = prevEntry{offset: off, prev: e.table[h1].offset}
 			index0++
 		}
-
-		cv = load6432(src, s)
-		if !canRepeat {
-			continue
-		}
-
-		// Check offset 2
-		for {
-			o2 := s - offset2
-			if load3232(src, o2) != uint32(cv) {
-				// Do regular search
-				break
-			}
-
-			// Store this, since we have it.
-			nextHashS := hashLen(cv, bestShortTableBits, bestShortLen)
-			nextHashL := hashLen(cv, bestLongTableBits, bestLongLen)
-
-			// We have at least 4 byte match.
-			// No need to check backwards. We come straight from a match
-			l := 4 + e.matchlen(s+4, o2+4, src)
-
-			e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: e.longTable[nextHashL].offset}
-			e.table[nextHashS] = prevEntry{offset: s + e.cur, prev: e.table[nextHashS].offset}
-			seq.matchLen = uint32(l) - zstdMinMatch
-			seq.litLen = 0
-
-			// Since litlen is always 0, this is offset 1.
-			seq.offset = 1
-			s += l
-			nextEmit = s
-			if debugSequences {
-				println("sequence", seq, "next s:", s)
-			}
-			blk.sequences = append(blk.sequences, seq)
-
-			// Swap offset 1 and 2.
-			offset1, offset2 = offset2, offset1
-			if s >= sLimit {
-				// Finished
-				break encodeLoop
-			}
-			cv = load6432(src, s)
-		}
 	}

 	if int(nextEmit) < len(src) {
--- a/vendor/github.com/klauspost/compress/zstd/enc_dfast.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go
@@ -1084,7 +1084,7 @@ func (e *doubleFastEncoderDict) Reset(d *dict, singleBlock bool) {
 			}
 		}
 		e.lastDictID = d.id
-		e.allDirty = true
+		allDirty = true
 	}
 	// Reset table to initial state
 	e.cur = e.maxMatchOff
--- a/vendor/github.com/klauspost/compress/zstd/enc_fast.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_fast.go
@@ -133,8 +133,7 @@ encodeLoop:
 			if canRepeat && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) {
 				// Consider history as well.
 				var seq seq
-				var length int32
-				length = 4 + e.matchlen(s+6, repIndex+4, src)
+				length := 4 + e.matchlen(s+6, repIndex+4, src)
 				seq.matchLen = uint32(length - zstdMinMatch)

 				// We might be able to match backwards.
@@ -645,8 +644,7 @@ encodeLoop:
 			if canRepeat && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) {
 				// Consider history as well.
 				var seq seq
-				var length int32
-				length = 4 + e.matchlen(s+6, repIndex+4, src)
+				length := 4 + e.matchlen(s+6, repIndex+4, src)

 				seq.matchLen = uint32(length - zstdMinMatch)

@@ -831,13 +829,12 @@ func (e *fastEncoderDict) Reset(d *dict, singleBlock bool) {
 		}
 		if true {
 			end := e.maxMatchOff + int32(len(d.content)) - 8
-			for i := e.maxMatchOff; i < end; i += 3 {
+			for i := e.maxMatchOff; i < end; i += 2 {
 				const hashLog = tableBits

 				cv := load6432(d.content, i-e.maxMatchOff)
-				nextHash := hashLen(cv, hashLog, tableFastHashLen)      // 0 -> 5
-				nextHash1 := hashLen(cv>>8, hashLog, tableFastHashLen)  // 1 -> 6
-				nextHash2 := hashLen(cv>>16, hashLog, tableFastHashLen) // 2 -> 7
+				nextHash := hashLen(cv, hashLog, tableFastHashLen)     // 0 -> 6
+				nextHash1 := hashLen(cv>>8, hashLog, tableFastHashLen) // 1 -> 7
 				e.dictTable[nextHash] = tableEntry{
 					val:    uint32(cv),
 					offset: i,
@@ -846,10 +843,6 @@ func (e *fastEncoderDict) Reset(d *dict, singleBlock bool) {
 					val:    uint32(cv >> 8),
 					offset: i + 1,
 				}
-				e.dictTable[nextHash2] = tableEntry{
-					val:    uint32(cv >> 16),
-					offset: i + 2,
-				}
 			}
 		}
 		e.lastDictID = d.id
--- a/vendor/github.com/klauspost/compress/zstd/encoder.go
+++ b/vendor/github.com/klauspost/compress/zstd/encoder.go
@@ -277,23 +277,9 @@ func (e *Encoder) nextBlock(final bool) error {
 			s.eofWritten = true
 		}

-		err := errIncompressible
-		// If we got the exact same number of literals as input,
-		// assume the literals cannot be compressed.
-		if len(src) != len(blk.literals) || len(src) != e.o.blockSize {
-			err = blk.encode(src, e.o.noEntropy, !e.o.allLitEntropy)
-		}
-		switch err {
-		case errIncompressible:
-			if debugEncoder {
-				println("Storing incompressible block as raw")
-			}
-			blk.encodeRaw(src)
-			// In fast mode, we do not transfer offsets, so we don't have to deal with changing the.
-		case nil:
-		default:
-			s.err = err
-			return err
+		s.err = blk.encode(src, e.o.noEntropy, !e.o.allLitEntropy)
+		if s.err != nil {
+			return s.err
 		}
 		_, s.err = s.w.Write(blk.output)
 		s.nWritten += int64(len(blk.output))
@@ -343,22 +329,8 @@ func (e *Encoder) nextBlock(final bool) error {
 				}
 				s.wWg.Done()
 			}()
-			err := errIncompressible
-			// If we got the exact same number of literals as input,
-			// assume the literals cannot be compressed.
-			if len(src) != len(blk.literals) || len(src) != e.o.blockSize {
-				err = blk.encode(src, e.o.noEntropy, !e.o.allLitEntropy)
-			}
-			switch err {
-			case errIncompressible:
-				if debugEncoder {
-					println("Storing incompressible block as raw")
-				}
-				blk.encodeRaw(src)
-				// In fast mode, we do not transfer offsets, so we don't have to deal with changing the.
-			case nil:
-			default:
-				s.writeErr = err
+			s.writeErr = blk.encode(src, e.o.noEntropy, !e.o.allLitEntropy)
+			if s.writeErr != nil {
 				return
 			}
 			_, s.writeErr = s.w.Write(blk.output)
@@ -568,25 +540,15 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {

 		// If we got the exact same number of literals as input,
 		// assume the literals cannot be compressed.
-		err := errIncompressible
 		oldout := blk.output
-		if len(blk.literals) != len(src) || len(src) != e.o.blockSize {
-			// Output directly to dst
-			blk.output = dst
-			err = blk.encode(src, e.o.noEntropy, !e.o.allLitEntropy)
-		}
+		// Output directly to dst
+		blk.output = dst

-		switch err {
-		case errIncompressible:
-			if debugEncoder {
-				println("Storing incompressible block as raw")
-			}
-			dst = blk.encodeRawTo(dst, src)
-		case nil:
-			dst = blk.output
-		default:
+		err := blk.encode(src, e.o.noEntropy, !e.o.allLitEntropy)
+		if err != nil {
 			panic(err)
 		}
+		dst = blk.output
 		blk.output = oldout
 	} else {
 		enc.Reset(e.o.dict, false)
@@ -605,25 +567,11 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
 			if len(src) == 0 {
 				blk.last = true
 			}
-			err := errIncompressible
-			// If we got the exact same number of literals as input,
-			// assume the literals cannot be compressed.
-			if len(blk.literals) != len(todo) || len(todo) != e.o.blockSize {
-				err = blk.encode(todo, e.o.noEntropy, !e.o.allLitEntropy)
-			}
-
-			switch err {
-			case errIncompressible:
-				if debugEncoder {
-					println("Storing incompressible block as raw")
-				}
-				dst = blk.encodeRawTo(dst, todo)
-				blk.popOffsets()
-			case nil:
-				dst = append(dst, blk.output...)
-			default:
+			err := blk.encode(todo, e.o.noEntropy, !e.o.allLitEntropy)
+			if err != nil {
 				panic(err)
 			}
+			dst = append(dst, blk.output...)
 			blk.reset(nil)
 		}
 	}
--- a/vendor/github.com/klauspost/compress/zstd/encoder_options.go
+++ b/vendor/github.com/klauspost/compress/zstd/encoder_options.go
@@ -39,7 +39,7 @@ func (o *encoderOptions) setDefault() {
 		blockSize:     maxCompressedBlockSize,
 		windowSize:    8 << 20,
 		level:         SpeedDefault,
-		allLitEntropy: true,
+		allLitEntropy: false,
 		lowMem:        false,
 	}
 }
@@ -129,7 +129,7 @@ func WithEncoderPadding(n int) EOption {
 		}
 		// No need to waste our time.
 		if n == 1 {
-			o.pad = 0
+			n = 0
 		}
 		if n > 1<<30 {
 			return fmt.Errorf("padding must less than 1GB (1<<30 bytes) ")
@@ -238,7 +238,7 @@ func WithEncoderLevel(l EncoderLevel) EOption {
 			}
 		}
 		if !o.customALEntropy {
-			o.allLitEntropy = l > SpeedFastest
+			o.allLitEntropy = l > SpeedDefault
 		}

 		return nil
--- a/vendor/github.com/klauspost/compress/zstd/framedec.go
+++ b/vendor/github.com/klauspost/compress/zstd/framedec.go
@@ -73,20 +73,20 @@ func (d *frameDec) reset(br byteBuffer) error {
 		switch err {
 		case io.EOF, io.ErrUnexpectedEOF:
 			return io.EOF
-		default:
-			return err
 		case nil:
 			signature[0] = b[0]
+		default:
+			return err
 		}
 		// Read the rest, don't allow io.ErrUnexpectedEOF
 		b, err = br.readSmall(3)
 		switch err {
 		case io.EOF:
 			return io.EOF
-		default:
-			return err
 		case nil:
 			copy(signature[1:], b)
+		default:
+			return err
 		}

 		if string(signature[1:4]) != skippableFrameMagic || signature[0]&0xf0 != 0x50 {
@@ -293,13 +293,9 @@ func (d *frameDec) next(block *blockDec) error {
 	return nil
 }

-// checkCRC will check the checksum if the frame has one.
+// checkCRC will check the checksum, assuming the frame has one.
 // Will return ErrCRCMismatch if crc check failed, otherwise nil.
 func (d *frameDec) checkCRC() error {
-	if !d.HasCheckSum {
-		return nil
-	}
-
 	// We can overwrite upper tmp now
 	buf, err := d.rawInput.readSmall(4)
 	if err != nil {
@@ -307,10 +303,6 @@ func (d *frameDec) checkCRC() error {
 		return err
 	}

-	if d.o.ignoreChecksum {
-		return nil
-	}
-
 	want := binary.LittleEndian.Uint32(buf[:4])
 	got := uint32(d.crc.Sum64())

@@ -326,17 +318,13 @@ func (d *frameDec) checkCRC() error {
 	return nil
 }

-// consumeCRC reads the checksum data if the frame has one.
+// consumeCRC skips over the checksum, assuming the frame has one.
 func (d *frameDec) consumeCRC() error {
-	if d.HasCheckSum {
-		_, err := d.rawInput.readSmall(4)
-		if err != nil {
-			println("CRC missing?", err)
-			return err
-		}
+	_, err := d.rawInput.readSmall(4)
+	if err != nil {
+		println("CRC missing?", err)
 	}
-
-	return nil
+	return err
 }

 // runDecoder will run the decoder for the remainder of the frame.
@@ -415,15 +403,8 @@ func (d *frameDec) runDecoder(dst []byte, dec *blockDec) ([]byte, error) {
 			if d.o.ignoreChecksum {
 				err = d.consumeCRC()
 			} else {
-				var n int
-				n, err = d.crc.Write(dst[crcStart:])
-				if err == nil {
-					if n != len(dst)-crcStart {
-						err = io.ErrShortWrite
-					} else {
-						err = d.checkCRC()
-					}
-				}
+				d.crc.Write(dst[crcStart:])
+				err = d.checkCRC()
 			}
 		}
 	}
--- a/vendor/github.com/klauspost/compress/zstd/matchlen_amd64.go
+++ b/vendor/github.com/klauspost/compress/zstd/matchlen_amd64.go
@@ -0,0 +1,16 @@
+//go:build amd64 && !appengine && !noasm && gc
+// +build amd64,!appengine,!noasm,gc
+
+// Copyright 2019+ Klaus Post. All rights reserved.
+// License information can be found in the LICENSE file.
+
+package zstd
+
+// matchLen returns how many bytes match in a and b
+//
+// It assumes that:
+//
+//	len(a) <= len(b) and len(a) > 0
+//
+//go:noescape
+func matchLen(a []byte, b []byte) int
--- a/vendor/github.com/klauspost/compress/zstd/matchlen_amd64.s
+++ b/vendor/github.com/klauspost/compress/zstd/matchlen_amd64.s
@@ -0,0 +1,68 @@
+// Copied from S2 implementation.
+
+//go:build !appengine && !noasm && gc && !noasm
+
+#include "textflag.h"
+
+// func matchLen(a []byte, b []byte) int
+// Requires: BMI
+TEXT ·matchLen(SB), NOSPLIT, $0-56
+	MOVQ a_base+0(FP), AX
+	MOVQ b_base+24(FP), CX
+	MOVQ a_len+8(FP), DX
+
+	// matchLen
+	XORL SI, SI
+	CMPL DX, $0x08
+	JB   matchlen_match4_standalone
+
+matchlen_loopback_standalone:
+	MOVQ  (AX)(SI*1), BX
+	XORQ  (CX)(SI*1), BX
+	TESTQ BX, BX
+	JZ    matchlen_loop_standalone
+
+#ifdef GOAMD64_v3
+	TZCNTQ BX, BX
+#else
+	BSFQ BX, BX
+#endif
+	SARQ $0x03, BX
+	LEAL (SI)(BX*1), SI
+	JMP  gen_match_len_end
+
+matchlen_loop_standalone:
+	LEAL -8(DX), DX
+	LEAL 8(SI), SI
+	CMPL DX, $0x08
+	JAE  matchlen_loopback_standalone
+
+matchlen_match4_standalone:
+	CMPL DX, $0x04
+	JB   matchlen_match2_standalone
+	MOVL (AX)(SI*1), BX
+	CMPL (CX)(SI*1), BX
+	JNE  matchlen_match2_standalone
+	LEAL -4(DX), DX
+	LEAL 4(SI), SI
+
+matchlen_match2_standalone:
+	CMPL DX, $0x02
+	JB   matchlen_match1_standalone
+	MOVW (AX)(SI*1), BX
+	CMPW (CX)(SI*1), BX
+	JNE  matchlen_match1_standalone
+	LEAL -2(DX), DX
+	LEAL 2(SI), SI
+
+matchlen_match1_standalone:
+	CMPL DX, $0x01
+	JB   gen_match_len_end
+	MOVB (AX)(SI*1), BL
+	CMPB (CX)(SI*1), BL
+	JNE  gen_match_len_end
+	INCL SI
+
+gen_match_len_end:
+	MOVQ SI, ret+48(FP)
+	RET
--- a/vendor/github.com/klauspost/compress/zstd/matchlen_generic.go
+++ b/vendor/github.com/klauspost/compress/zstd/matchlen_generic.go
@@ -0,0 +1,33 @@
+//go:build !amd64 || appengine || !gc || noasm
+// +build !amd64 appengine !gc noasm
+
+// Copyright 2019+ Klaus Post. All rights reserved.
+// License information can be found in the LICENSE file.
+
+package zstd
+
+import (
+	"encoding/binary"
+	"math/bits"
+)
+
+// matchLen returns the maximum common prefix length of a and b.
+// a must be the shortest of the two.
+func matchLen(a, b []byte) (n int) {
+	for ; len(a) >= 8 && len(b) >= 8; a, b = a[8:], b[8:] {
+		diff := binary.LittleEndian.Uint64(a) ^ binary.LittleEndian.Uint64(b)
+		if diff != 0 {
+			return n + bits.TrailingZeros64(diff)>>3
+		}
+		n += 8
+	}
+
+	for i := range a {
+		if a[i] != b[i] {
+			break
+		}
+		n++
+	}
+	return n
+
+}
--- a/vendor/github.com/klauspost/compress/zstd/seqdec.go
+++ b/vendor/github.com/klauspost/compress/zstd/seqdec.go
@@ -236,9 +236,12 @@ func (s *sequenceDecs) decodeSync(hist []byte) error {
 		maxBlockSize = s.windowSize
 	}

+	if debugDecoder {
+		println("decodeSync: decoding", seqs, "sequences", br.remain(), "bits remain on stream")
+	}
 	for i := seqs - 1; i >= 0; i-- {
 		if br.overread() {
-			printf("reading sequence %d, exceeded available data\n", seqs-i)
+			printf("reading sequence %d, exceeded available data. Overread by %d\n", seqs-i, -br.remain())
 			return io.ErrUnexpectedEOF
 		}
 		var ll, mo, ml int
@@ -314,9 +317,6 @@ func (s *sequenceDecs) decodeSync(hist []byte) error {
 		}
 		size := ll + ml + len(out)
 		if size-startSize > maxBlockSize {
-			if size-startSize == 424242 {
-				panic("here")
-			}
 			return fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
 		}
 		if size > cap(out) {
@@ -427,8 +427,7 @@ func (s *sequenceDecs) decodeSync(hist []byte) error {
 		}
 	}

-	// Check if space for literals
-	if size := len(s.literals) + len(s.out) - startSize; size > maxBlockSize {
+	if size := len(s.literals) + len(out) - startSize; size > maxBlockSize {
 		return fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
 	}

--- a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go
+++ b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go
@@ -5,6 +5,7 @@ package zstd

 import (
 	"fmt"
+	"io"

 	"github.com/klauspost/compress/internal/cpuinfo"
 )
@@ -134,6 +135,9 @@ func (s *sequenceDecs) decodeSyncSimple(hist []byte) (bool, error) {
 		return true, fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available",
 			ctx.ll, ctx.litRemain+ctx.ll)

+	case errorOverread:
+		return true, io.ErrUnexpectedEOF
+
 	case errorNotEnoughSpace:
 		size := ctx.outPosition + ctx.ll + ctx.ml
 		if debugDecoder {
@@ -148,7 +152,6 @@ func (s *sequenceDecs) decodeSyncSimple(hist []byte) (bool, error) {
 	s.seqSize += ctx.litRemain
 	if s.seqSize > maxBlockSize {
 		return true, fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
-
 	}
 	err := br.close()
 	if err != nil {
@@ -203,6 +206,9 @@ const errorNotEnoughLiterals = 4
 // error reported when capacity of `out` is too small
 const errorNotEnoughSpace = 5

+// error reported when bits are overread.
+const errorOverread = 6
+
 // sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm.
 //
 // Please refer to seqdec_generic.go for the reference implementation.
@@ -248,6 +254,10 @@ func (s *sequenceDecs) decode(seqs []seqVals) error {
 		litRemain: len(s.literals),
 	}

+	if debugDecoder {
+		println("decode: decoding", len(seqs), "sequences", br.remain(), "bits remain on stream")
+	}
+
 	s.seqSize = 0
 	lte56bits := s.maxBits+s.offsets.fse.actualTableLog+s.matchLengths.fse.actualTableLog+s.litLengths.fse.actualTableLog <= 56
 	var errCode int
@@ -278,6 +288,8 @@ func (s *sequenceDecs) decode(seqs []seqVals) error {
 		case errorNotEnoughLiterals:
 			ll := ctx.seqs[i].ll
 			return fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available", ll, ctx.litRemain+ll)
+		case errorOverread:
+			return io.ErrUnexpectedEOF
 		}

 		return fmt.Errorf("sequenceDecs_decode_amd64 returned erronous code %d", errCode)
@@ -292,6 +304,9 @@ func (s *sequenceDecs) decode(seqs []seqVals) error {
 	if s.seqSize > maxBlockSize {
 		return fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
 	}
+	if debugDecoder {
+		println("decode: ", br.remain(), "bits remain on stream. code:", errCode)
+	}
 	err := br.close()
 	if err != nil {
 		printf("Closing sequences: %v, %+v\n", err, *br)
--- a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s
+++ b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s
@@ -38,7 +38,7 @@ sequenceDecs_decode_amd64_main_loop:

 sequenceDecs_decode_amd64_fill_byte_by_byte:
 	CMPQ    SI, $0x00
-	JLE     sequenceDecs_decode_amd64_fill_end
+	JLE     sequenceDecs_decode_amd64_fill_check_overread
 	CMPQ    BX, $0x07
 	JLE     sequenceDecs_decode_amd64_fill_end
 	SHLQ    $0x08, DX
@@ -49,6 +49,10 @@ sequenceDecs_decode_amd64_fill_byte_by_byte:
 	ORQ     AX, DX
 	JMP     sequenceDecs_decode_amd64_fill_byte_by_byte

+sequenceDecs_decode_amd64_fill_check_overread:
+	CMPQ BX, $0x40
+	JA   error_overread
+
 sequenceDecs_decode_amd64_fill_end:
 	// Update offset
 	MOVQ  R9, AX
@@ -105,7 +109,7 @@ sequenceDecs_decode_amd64_ml_update_zero:

 sequenceDecs_decode_amd64_fill_2_byte_by_byte:
 	CMPQ    SI, $0x00
-	JLE     sequenceDecs_decode_amd64_fill_2_end
+	JLE     sequenceDecs_decode_amd64_fill_2_check_overread
 	CMPQ    BX, $0x07
 	JLE     sequenceDecs_decode_amd64_fill_2_end
 	SHLQ    $0x08, DX
@@ -116,6 +120,10 @@ sequenceDecs_decode_amd64_fill_2_byte_by_byte:
 	ORQ     AX, DX
 	JMP     sequenceDecs_decode_amd64_fill_2_byte_by_byte

+sequenceDecs_decode_amd64_fill_2_check_overread:
+	CMPQ BX, $0x40
+	JA   error_overread
+
 sequenceDecs_decode_amd64_fill_2_end:
 	// Update literal length
 	MOVQ  DI, AX
@@ -320,6 +328,11 @@ error_not_enough_literals:
 	MOVQ $0x00000004, ret+24(FP)
 	RET

+	// Return with overread error
+error_overread:
+	MOVQ $0x00000006, ret+24(FP)
+	RET
+
 // func sequenceDecs_decode_56_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
 // Requires: CMOV
 TEXT ·sequenceDecs_decode_56_amd64(SB), $8-32
@@ -356,7 +369,7 @@ sequenceDecs_decode_56_amd64_main_loop:

 sequenceDecs_decode_56_amd64_fill_byte_by_byte:
 	CMPQ    SI, $0x00
-	JLE     sequenceDecs_decode_56_amd64_fill_end
+	JLE     sequenceDecs_decode_56_amd64_fill_check_overread
 	CMPQ    BX, $0x07
 	JLE     sequenceDecs_decode_56_amd64_fill_end
 	SHLQ    $0x08, DX
@@ -367,6 +380,10 @@ sequenceDecs_decode_56_amd64_fill_byte_by_byte:
 	ORQ     AX, DX
 	JMP     sequenceDecs_decode_56_amd64_fill_byte_by_byte

+sequenceDecs_decode_56_amd64_fill_check_overread:
+	CMPQ BX, $0x40
+	JA   error_overread
+
 sequenceDecs_decode_56_amd64_fill_end:
 	// Update offset
 	MOVQ  R9, AX
@@ -613,6 +630,11 @@ error_not_enough_literals:
 	MOVQ $0x00000004, ret+24(FP)
 	RET

+	// Return with overread error
+error_overread:
+	MOVQ $0x00000006, ret+24(FP)
+	RET
+
 // func sequenceDecs_decode_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
 // Requires: BMI, BMI2, CMOV
 TEXT ·sequenceDecs_decode_bmi2(SB), $8-32
@@ -649,7 +671,7 @@ sequenceDecs_decode_bmi2_main_loop:

 sequenceDecs_decode_bmi2_fill_byte_by_byte:
 	CMPQ    BX, $0x00
-	JLE     sequenceDecs_decode_bmi2_fill_end
+	JLE     sequenceDecs_decode_bmi2_fill_check_overread
 	CMPQ    DX, $0x07
 	JLE     sequenceDecs_decode_bmi2_fill_end
 	SHLQ    $0x08, AX
@@ -660,6 +682,10 @@ sequenceDecs_decode_bmi2_fill_byte_by_byte:
 	ORQ     CX, AX
 	JMP     sequenceDecs_decode_bmi2_fill_byte_by_byte

+sequenceDecs_decode_bmi2_fill_check_overread:
+	CMPQ DX, $0x40
+	JA   error_overread
+
 sequenceDecs_decode_bmi2_fill_end:
 	// Update offset
 	MOVQ   $0x00000808, CX
@@ -700,7 +726,7 @@ sequenceDecs_decode_bmi2_fill_end:

 sequenceDecs_decode_bmi2_fill_2_byte_by_byte:
 	CMPQ    BX, $0x00
-	JLE     sequenceDecs_decode_bmi2_fill_2_end
+	JLE     sequenceDecs_decode_bmi2_fill_2_check_overread
 	CMPQ    DX, $0x07
 	JLE     sequenceDecs_decode_bmi2_fill_2_end
 	SHLQ    $0x08, AX
@@ -711,6 +737,10 @@ sequenceDecs_decode_bmi2_fill_2_byte_by_byte:
 	ORQ     CX, AX
 	JMP     sequenceDecs_decode_bmi2_fill_2_byte_by_byte

+sequenceDecs_decode_bmi2_fill_2_check_overread:
+	CMPQ DX, $0x40
+	JA   error_overread
+
 sequenceDecs_decode_bmi2_fill_2_end:
 	// Update literal length
 	MOVQ   $0x00000808, CX
@@ -889,6 +919,11 @@ error_not_enough_literals:
 	MOVQ $0x00000004, ret+24(FP)
 	RET

+	// Return with overread error
+error_overread:
+	MOVQ $0x00000006, ret+24(FP)
+	RET
+
 // func sequenceDecs_decode_56_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
 // Requires: BMI, BMI2, CMOV
 TEXT ·sequenceDecs_decode_56_bmi2(SB), $8-32
@@ -925,7 +960,7 @@ sequenceDecs_decode_56_bmi2_main_loop:

 sequenceDecs_decode_56_bmi2_fill_byte_by_byte:
 	CMPQ    BX, $0x00
-	JLE     sequenceDecs_decode_56_bmi2_fill_end
+	JLE     sequenceDecs_decode_56_bmi2_fill_check_overread
 	CMPQ    DX, $0x07
 	JLE     sequenceDecs_decode_56_bmi2_fill_end
 	SHLQ    $0x08, AX
@@ -936,6 +971,10 @@ sequenceDecs_decode_56_bmi2_fill_byte_by_byte:
 	ORQ     CX, AX
 	JMP     sequenceDecs_decode_56_bmi2_fill_byte_by_byte

+sequenceDecs_decode_56_bmi2_fill_check_overread:
+	CMPQ DX, $0x40
+	JA   error_overread
+
 sequenceDecs_decode_56_bmi2_fill_end:
 	// Update offset
 	MOVQ   $0x00000808, CX
@@ -1140,6 +1179,11 @@ error_not_enough_literals:
 	MOVQ $0x00000004, ret+24(FP)
 	RET

+	// Return with overread error
+error_overread:
+	MOVQ $0x00000006, ret+24(FP)
+	RET
+
 // func sequenceDecs_executeSimple_amd64(ctx *executeAsmContext) bool
 // Requires: SSE
 TEXT ·sequenceDecs_executeSimple_amd64(SB), $8-9
@@ -1804,7 +1848,7 @@ sequenceDecs_decodeSync_amd64_main_loop:

 sequenceDecs_decodeSync_amd64_fill_byte_by_byte:
 	CMPQ    SI, $0x00
-	JLE     sequenceDecs_decodeSync_amd64_fill_end
+	JLE     sequenceDecs_decodeSync_amd64_fill_check_overread
 	CMPQ    BX, $0x07
 	JLE     sequenceDecs_decodeSync_amd64_fill_end
 	SHLQ    $0x08, DX
@@ -1815,6 +1859,10 @@ sequenceDecs_decodeSync_amd64_fill_byte_by_byte:
 	ORQ     AX, DX
 	JMP     sequenceDecs_decodeSync_amd64_fill_byte_by_byte

+sequenceDecs_decodeSync_amd64_fill_check_overread:
+	CMPQ BX, $0x40
+	JA   error_overread
+
 sequenceDecs_decodeSync_amd64_fill_end:
 	// Update offset
 	MOVQ  R9, AX
@@ -1871,7 +1919,7 @@ sequenceDecs_decodeSync_amd64_ml_update_zero:

 sequenceDecs_decodeSync_amd64_fill_2_byte_by_byte:
 	CMPQ    SI, $0x00
-	JLE     sequenceDecs_decodeSync_amd64_fill_2_end
+	JLE     sequenceDecs_decodeSync_amd64_fill_2_check_overread
 	CMPQ    BX, $0x07
 	JLE     sequenceDecs_decodeSync_amd64_fill_2_end
 	SHLQ    $0x08, DX
@@ -1882,6 +1930,10 @@ sequenceDecs_decodeSync_amd64_fill_2_byte_by_byte:
 	ORQ     AX, DX
 	JMP     sequenceDecs_decodeSync_amd64_fill_2_byte_by_byte

+sequenceDecs_decodeSync_amd64_fill_2_check_overread:
+	CMPQ BX, $0x40
+	JA   error_overread
+
 sequenceDecs_decodeSync_amd64_fill_2_end:
 	// Update literal length
 	MOVQ  DI, AX
@@ -2291,6 +2343,11 @@ error_not_enough_literals:
 	MOVQ $0x00000004, ret+24(FP)
 	RET

+	// Return with overread error
+error_overread:
+	MOVQ $0x00000006, ret+24(FP)
+	RET
+
 	// Return with not enough output space error
 error_not_enough_space:
 	MOVQ ctx+16(FP), AX
@@ -2356,7 +2413,7 @@ sequenceDecs_decodeSync_bmi2_main_loop:

 sequenceDecs_decodeSync_bmi2_fill_byte_by_byte:
 	CMPQ    BX, $0x00
-	JLE     sequenceDecs_decodeSync_bmi2_fill_end
+	JLE     sequenceDecs_decodeSync_bmi2_fill_check_overread
 	CMPQ    DX, $0x07
 	JLE     sequenceDecs_decodeSync_bmi2_fill_end
 	SHLQ    $0x08, AX
@@ -2367,6 +2424,10 @@ sequenceDecs_decodeSync_bmi2_fill_byte_by_byte:
 	ORQ     CX, AX
 	JMP     sequenceDecs_decodeSync_bmi2_fill_byte_by_byte

+sequenceDecs_decodeSync_bmi2_fill_check_overread:
+	CMPQ DX, $0x40
+	JA   error_overread
+
 sequenceDecs_decodeSync_bmi2_fill_end:
 	// Update offset
 	MOVQ   $0x00000808, CX
@@ -2407,7 +2468,7 @@ sequenceDecs_decodeSync_bmi2_fill_end:

 sequenceDecs_decodeSync_bmi2_fill_2_byte_by_byte:
 	CMPQ    BX, $0x00
-	JLE     sequenceDecs_decodeSync_bmi2_fill_2_end
+	JLE     sequenceDecs_decodeSync_bmi2_fill_2_check_overread
 	CMPQ    DX, $0x07
 	JLE     sequenceDecs_decodeSync_bmi2_fill_2_end
 	SHLQ    $0x08, AX
@@ -2418,6 +2479,10 @@ sequenceDecs_decodeSync_bmi2_fill_2_byte_by_byte:
 	ORQ     CX, AX
 	JMP     sequenceDecs_decodeSync_bmi2_fill_2_byte_by_byte

+sequenceDecs_decodeSync_bmi2_fill_2_check_overread:
+	CMPQ DX, $0x40
+	JA   error_overread
+
 sequenceDecs_decodeSync_bmi2_fill_2_end:
 	// Update literal length
 	MOVQ   $0x00000808, CX
@@ -2801,6 +2866,11 @@ error_not_enough_literals:
 	MOVQ $0x00000004, ret+24(FP)
 	RET

+	// Return with overread error
+error_overread:
+	MOVQ $0x00000006, ret+24(FP)
+	RET
+
 	// Return with not enough output space error
 error_not_enough_space:
 	MOVQ ctx+16(FP), AX
@@ -2866,7 +2936,7 @@ sequenceDecs_decodeSync_safe_amd64_main_loop:

 sequenceDecs_decodeSync_safe_amd64_fill_byte_by_byte:
 	CMPQ    SI, $0x00
-	JLE     sequenceDecs_decodeSync_safe_amd64_fill_end
+	JLE     sequenceDecs_decodeSync_safe_amd64_fill_check_overread
 	CMPQ    BX, $0x07
 	JLE     sequenceDecs_decodeSync_safe_amd64_fill_end
 	SHLQ    $0x08, DX
@@ -2877,6 +2947,10 @@ sequenceDecs_decodeSync_safe_amd64_fill_byte_by_byte:
 	ORQ     AX, DX
 	JMP     sequenceDecs_decodeSync_safe_amd64_fill_byte_by_byte

+sequenceDecs_decodeSync_safe_amd64_fill_check_overread:
+	CMPQ BX, $0x40
+	JA   error_overread
+
 sequenceDecs_decodeSync_safe_amd64_fill_end:
 	// Update offset
 	MOVQ  R9, AX
@@ -2933,7 +3007,7 @@ sequenceDecs_decodeSync_safe_amd64_ml_update_zero:

 sequenceDecs_decodeSync_safe_amd64_fill_2_byte_by_byte:
 	CMPQ    SI, $0x00
-	JLE     sequenceDecs_decodeSync_safe_amd64_fill_2_end
+	JLE     sequenceDecs_decodeSync_safe_amd64_fill_2_check_overread
 	CMPQ    BX, $0x07
 	JLE     sequenceDecs_decodeSync_safe_amd64_fill_2_end
 	SHLQ    $0x08, DX
@@ -2944,6 +3018,10 @@ sequenceDecs_decodeSync_safe_amd64_fill_2_byte_by_byte:
 	ORQ     AX, DX
 	JMP     sequenceDecs_decodeSync_safe_amd64_fill_2_byte_by_byte

+sequenceDecs_decodeSync_safe_amd64_fill_2_check_overread:
+	CMPQ BX, $0x40
+	JA   error_overread
+
 sequenceDecs_decodeSync_safe_amd64_fill_2_end:
 	// Update literal length
 	MOVQ  DI, AX
@@ -3455,6 +3533,11 @@ error_not_enough_literals:
 	MOVQ $0x00000004, ret+24(FP)
 	RET

+	// Return with overread error
+error_overread:
+	MOVQ $0x00000006, ret+24(FP)
+	RET
+
 	// Return with not enough output space error
 error_not_enough_space:
 	MOVQ ctx+16(FP), AX
@@ -3520,7 +3603,7 @@ sequenceDecs_decodeSync_safe_bmi2_main_loop:

 sequenceDecs_decodeSync_safe_bmi2_fill_byte_by_byte:
 	CMPQ    BX, $0x00
-	JLE     sequenceDecs_decodeSync_safe_bmi2_fill_end
+	JLE     sequenceDecs_decodeSync_safe_bmi2_fill_check_overread
 	CMPQ    DX, $0x07
 	JLE     sequenceDecs_decodeSync_safe_bmi2_fill_end
 	SHLQ    $0x08, AX
@@ -3531,6 +3614,10 @@ sequenceDecs_decodeSync_safe_bmi2_fill_byte_by_byte:
 	ORQ     CX, AX
 	JMP     sequenceDecs_decodeSync_safe_bmi2_fill_byte_by_byte

+sequenceDecs_decodeSync_safe_bmi2_fill_check_overread:
+	CMPQ DX, $0x40
+	JA   error_overread
+
 sequenceDecs_decodeSync_safe_bmi2_fill_end:
 	// Update offset
 	MOVQ   $0x00000808, CX
@@ -3571,7 +3658,7 @@ sequenceDecs_decodeSync_safe_bmi2_fill_end:

 sequenceDecs_decodeSync_safe_bmi2_fill_2_byte_by_byte:
 	CMPQ    BX, $0x00
-	JLE     sequenceDecs_decodeSync_safe_bmi2_fill_2_end
+	JLE     sequenceDecs_decodeSync_safe_bmi2_fill_2_check_overread
 	CMPQ    DX, $0x07
 	JLE     sequenceDecs_decodeSync_safe_bmi2_fill_2_end
 	SHLQ    $0x08, AX
@@ -3582,6 +3669,10 @@ sequenceDecs_decodeSync_safe_bmi2_fill_2_byte_by_byte:
 	ORQ     CX, AX
 	JMP     sequenceDecs_decodeSync_safe_bmi2_fill_2_byte_by_byte

+sequenceDecs_decodeSync_safe_bmi2_fill_2_check_overread:
+	CMPQ DX, $0x40
+	JA   error_overread
+
 sequenceDecs_decodeSync_safe_bmi2_fill_2_end:
 	// Update literal length
 	MOVQ   $0x00000808, CX
@@ -4067,6 +4158,11 @@ error_not_enough_literals:
 	MOVQ $0x00000004, ret+24(FP)
 	RET

+	// Return with overread error
+error_overread:
+	MOVQ $0x00000006, ret+24(FP)
+	RET
+
 	// Return with not enough output space error
 error_not_enough_space:
 	MOVQ ctx+16(FP), AX
--- a/vendor/github.com/klauspost/compress/zstd/zstd.go
+++ b/vendor/github.com/klauspost/compress/zstd/zstd.go
@@ -9,7 +9,6 @@ import (
 	"errors"
 	"log"
 	"math"
-	"math/bits"
 )

 // enable debug printing
@@ -106,33 +105,12 @@ func printf(format string, a ...interface{}) {
 	}
 }

-// matchLen returns the maximum common prefix length of a and b.
-// a must be the shortest of the two.
-func matchLen(a, b []byte) (n int) {
-	for ; len(a) >= 8 && len(b) >= 8; a, b = a[8:], b[8:] {
-		diff := binary.LittleEndian.Uint64(a) ^ binary.LittleEndian.Uint64(b)
-		if diff != 0 {
-			return n + bits.TrailingZeros64(diff)>>3
-		}
-		n += 8
-	}
-
-	for i := range a {
-		if a[i] != b[i] {
-			break
-		}
-		n++
-	}
-	return n
-
-}
-
 func load3232(b []byte, i int32) uint32 {
-	return binary.LittleEndian.Uint32(b[i:])
+	return binary.LittleEndian.Uint32(b[:len(b):len(b)][i:])
 }

 func load6432(b []byte, i int32) uint64 {
-	return binary.LittleEndian.Uint64(b[i:])
+	return binary.LittleEndian.Uint64(b[:len(b):len(b)][i:])
 }

 type byter interface {