Update dependencies and build to go1.22

2024-01-06 21:12:22 +01:00
parent 56e7bd01ca
commit c6efa7176e
1553 changed files with 3279516 additions and 1924371 deletions
--- a/vendor/github.com/klauspost/compress/.goreleaser.yml
+++ b/vendor/github.com/klauspost/compress/.goreleaser.yml
@@ -3,7 +3,7 @@
 before:
  hooks:
    - ./gen.sh
-    - go install mvdan.cc/garble@v0.9.3
+    - go install mvdan.cc/garble@v0.10.1

 builds:
  -
@@ -92,16 +92,7 @@ builds:
 archives:
  -
    id: s2-binaries
-    name_template: "s2-{{ .Os }}_{{ .Arch }}_{{ .Version }}"
-    replacements:
-      aix: AIX
-      darwin: OSX
-      linux: Linux
-      windows: Windows
-      386: i386
-      amd64: x86_64
-      freebsd: FreeBSD
-      netbsd: NetBSD
+    name_template: "s2-{{ .Os }}_{{ .Arch }}{{ if .Arm }}v{{ .Arm }}{{ end }}"
    format_overrides:
      - goos: windows
        format: zip
@@ -125,7 +116,7 @@ changelog:

 nfpms:
  -
-    file_name_template: "s2_package_{{ .Version }}_{{ .Os }}_{{ .Arch }}"
+    file_name_template: "s2_package__{{ .Os }}_{{ .Arch }}{{ if .Arm }}v{{ .Arm }}{{ end }}"
    vendor: Klaus Post
    homepage: https://github.com/klauspost/compress
    maintainer: Klaus Post <klauspost@gmail.com>
@@ -134,8 +125,3 @@ nfpms:
    formats:
      - deb
      - rpm
-    replacements:
-      darwin: Darwin
-      linux: Linux
-      freebsd: FreeBSD
-      amd64: x86_64
--- a/vendor/github.com/klauspost/compress/README.md
+++ b/vendor/github.com/klauspost/compress/README.md
@@ -16,6 +16,47 @@ This package provides various compression algorithms.

 # changelog

+* July 1st, 2023 - [v1.16.7](https://github.com/klauspost/compress/releases/tag/v1.16.7)
+	* zstd: Fix default level first dictionary encode https://github.com/klauspost/compress/pull/829
+	* s2: add GetBufferCapacity() method by @GiedriusS in https://github.com/klauspost/compress/pull/832
+
+* June 13, 2023 - [v1.16.6](https://github.com/klauspost/compress/releases/tag/v1.16.6)
+	* zstd: correctly ignore WithEncoderPadding(1) by @ianlancetaylor in https://github.com/klauspost/compress/pull/806
+	* zstd: Add amd64 match length assembly https://github.com/klauspost/compress/pull/824
+	* gzhttp: Handle informational headers by @rtribotte in https://github.com/klauspost/compress/pull/815
+	* s2: Improve Better compression slightly https://github.com/klauspost/compress/pull/663
+
+* Apr 16, 2023 - [v1.16.5](https://github.com/klauspost/compress/releases/tag/v1.16.5)
+	* zstd: readByte needs to use io.ReadFull by @jnoxon in https://github.com/klauspost/compress/pull/802
+	* gzip: Fix WriterTo after initial read https://github.com/klauspost/compress/pull/804
+
+* Apr 5, 2023 - [v1.16.4](https://github.com/klauspost/compress/releases/tag/v1.16.4)
+	* zstd: Improve zstd best efficiency by @greatroar and @klauspost in https://github.com/klauspost/compress/pull/784
+	* zstd: Respect WithAllLitEntropyCompression https://github.com/klauspost/compress/pull/792
+	* zstd: Fix amd64 not always detecting corrupt data https://github.com/klauspost/compress/pull/785
+	* zstd: Various minor improvements by @greatroar in https://github.com/klauspost/compress/pull/788 https://github.com/klauspost/compress/pull/794 https://github.com/klauspost/compress/pull/795
+	* s2: Fix huge block overflow https://github.com/klauspost/compress/pull/779
+	* s2: Allow CustomEncoder fallback https://github.com/klauspost/compress/pull/780
+	* gzhttp: Suppport ResponseWriter Unwrap() in gzhttp handler by @jgimenez in https://github.com/klauspost/compress/pull/799
+
+* Mar 13, 2023 - [v1.16.1](https://github.com/klauspost/compress/releases/tag/v1.16.1)
+	* zstd: Speed up + improve best encoder by @greatroar in https://github.com/klauspost/compress/pull/776
+	* gzhttp: Add optional [BREACH mitigation](https://github.com/klauspost/compress/tree/master/gzhttp#breach-mitigation). https://github.com/klauspost/compress/pull/762 https://github.com/klauspost/compress/pull/768 https://github.com/klauspost/compress/pull/769 https://github.com/klauspost/compress/pull/770 https://github.com/klauspost/compress/pull/767
+	* s2: Add Intel LZ4s converter https://github.com/klauspost/compress/pull/766
+	* zstd: Minor bug fixes https://github.com/klauspost/compress/pull/771 https://github.com/klauspost/compress/pull/772 https://github.com/klauspost/compress/pull/773
+	* huff0: Speed up compress1xDo by @greatroar in https://github.com/klauspost/compress/pull/774
+
+* Feb 26, 2023 - [v1.16.0](https://github.com/klauspost/compress/releases/tag/v1.16.0)
+	* s2: Add [Dictionary](https://github.com/klauspost/compress/tree/master/s2#dictionaries) support.  https://github.com/klauspost/compress/pull/685
+	* s2: Add Compression Size Estimate.  https://github.com/klauspost/compress/pull/752
+	* s2: Add support for custom stream encoder. https://github.com/klauspost/compress/pull/755
+	* s2: Add LZ4 block converter. https://github.com/klauspost/compress/pull/748
+	* s2: Support io.ReaderAt in ReadSeeker. https://github.com/klauspost/compress/pull/747
+	* s2c/s2sx: Use concurrent decoding. https://github.com/klauspost/compress/pull/746
+
+<details>
+	<summary>See changes to v1.15.x</summary>
+	
 * Jan 21st, 2023 (v1.15.15)
 	* deflate: Improve level 7-9 by @klauspost in https://github.com/klauspost/compress/pull/739
 	* zstd: Add delta encoding support by @greatroar in https://github.com/klauspost/compress/pull/728
@@ -142,6 +183,8 @@ Stream decompression is now faster on asynchronous, since the goroutine allocati

 While the release has been extensively tested, it is recommended to testing when upgrading.

+</details>
+
 <details>
 	<summary>See changes to v1.14.x</summary>
 	
@@ -600,6 +643,9 @@ Here are other packages of good quality and pure Go (no cgo wrappers or autoconv
 * [github.com/pierrec/lz4](https://github.com/pierrec/lz4) - strong multithreaded LZ4 compression.
 * [github.com/cosnicolaou/pbzip2](https://github.com/cosnicolaou/pbzip2) - multithreaded bzip2 decompression.
 * [github.com/dsnet/compress](https://github.com/dsnet/compress) - brotli decompression, bzip2 writer.
+* [github.com/ronanh/intcomp](https://github.com/ronanh/intcomp) - Integer compression.
+* [github.com/spenczar/fpc](https://github.com/spenczar/fpc) - Float compression.
+* [github.com/minio/zipindex](https://github.com/minio/zipindex) - External ZIP directory index.

 # license

--- a/vendor/github.com/klauspost/compress/SECURITY.md
+++ b/vendor/github.com/klauspost/compress/SECURITY.md
@@ -0,0 +1,25 @@
+# Security Policy
+
+## Supported Versions
+
+Security updates are applied only to the latest release.
+
+## Vulnerability Definition
+
+A security vulnerability is a bug that with certain input triggers a crash or an infinite loop. Most calls will have varying execution time and only in rare cases will slow operation be considered a security vulnerability.
+
+Corrupted output generally is not considered a security vulnerability, unless independent operations are able to affect each other. Note that not all functionality is re-entrant and safe to use concurrently.
+
+Out-of-memory crashes only applies if the en/decoder uses an abnormal amount of memory, with appropriate options applied, to limit maximum window size, concurrency, etc. However, if you are in doubt you are welcome to file a security issue.
+
+It is assumed that all callers are trusted, meaning internal data exposed through reflection or inspection of returned data structures is not considered a vulnerability.
+
+Vulnerabilities resulting from compiler/assembler errors should be reported upstream. Depending on the severity this package may or may not implement a workaround.
+
+## Reporting a Vulnerability
+
+If you have discovered a security vulnerability in this project, please report it privately. **Do not disclose it as a public issue.** This gives us time to work with you to fix the issue before public exposure, reducing the chance that the exploit will be used before a patch is released.
+
+Please disclose it at [security advisory](https://github.com/klauspost/compress/security/advisories/new). If possible please provide a minimal reproducer. If the issue only applies to a single platform, it would be helpful to provide access to that.
+
+This project is maintained by a team of volunteers on a reasonable-effort basis. As such, vulnerabilities will be disclosed in a best effort base.
--- a/vendor/github.com/klauspost/compress/fse/bitwriter.go
+++ b/vendor/github.com/klauspost/compress/fse/bitwriter.go
@@ -152,12 +152,11 @@ func (b *bitWriter) flushAlign() {

 // close will write the alignment bit and write the final byte(s)
 // to the output.
-func (b *bitWriter) close() error {
+func (b *bitWriter) close() {
 	// End mark
 	b.addBits16Clean(1, 1)
 	// flush until next byte.
 	b.flushAlign()
-	return nil
 }

 // reset and continue writing by appending to out.
--- a/vendor/github.com/klauspost/compress/fse/compress.go
+++ b/vendor/github.com/klauspost/compress/fse/compress.go
@@ -199,7 +199,8 @@ func (s *Scratch) compress(src []byte) error {
 	c2.flush(s.actualTableLog)
 	c1.flush(s.actualTableLog)

-	return s.bw.close()
+	s.bw.close()
+	return nil
 }

 // writeCount will write the normalized histogram count to header.
--- a/vendor/github.com/klauspost/compress/fse/decompress.go
+++ b/vendor/github.com/klauspost/compress/fse/decompress.go
@@ -260,7 +260,9 @@ func (s *Scratch) buildDtable() error {
 // If the buffer is over-read an error is returned.
 func (s *Scratch) decompress() error {
 	br := &s.bits
-	br.init(s.br.unread())
+	if err := br.init(s.br.unread()); err != nil {
+		return err
+	}

 	var s1, s2 decoder
 	// Initialize and decode first state and symbol.
--- a/vendor/github.com/klauspost/compress/huff0/bitwriter.go
+++ b/vendor/github.com/klauspost/compress/huff0/bitwriter.go
@@ -13,14 +13,6 @@ type bitWriter struct {
 	out          []byte
 }

-// bitMask16 is bitmasks. Has extra to avoid bounds check.
-var bitMask16 = [32]uint16{
-	0, 1, 3, 7, 0xF, 0x1F,
-	0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF,
-	0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0xFFFF,
-	0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
-	0xFFFF, 0xFFFF} /* up to 16 bits */
-
 // addBits16Clean will add up to 16 bits. value may not contain more set bits than indicated.
 // It will not check if there is space for them, so the caller must ensure that it has flushed recently.
 func (b *bitWriter) addBits16Clean(value uint16, bits uint8) {
@@ -60,6 +52,22 @@ func (b *bitWriter) encTwoSymbols(ct cTable, av, bv byte) {
 	b.nBits += encA.nBits + encB.nBits
 }

+// encFourSymbols adds up to 32 bits from four symbols.
+// It will not check if there is space for them,
+// so the caller must ensure that b has been flushed recently.
+func (b *bitWriter) encFourSymbols(encA, encB, encC, encD cTableEntry) {
+	bitsA := encA.nBits
+	bitsB := bitsA + encB.nBits
+	bitsC := bitsB + encC.nBits
+	bitsD := bitsC + encD.nBits
+	combined := uint64(encA.val) |
+		(uint64(encB.val) << (bitsA & 63)) |
+		(uint64(encC.val) << (bitsB & 63)) |
+		(uint64(encD.val) << (bitsC & 63))
+	b.bitContainer |= combined << (b.nBits & 63)
+	b.nBits += bitsD
+}
+
 // flush32 will flush out, so there are at least 32 bits available for writing.
 func (b *bitWriter) flush32() {
 	if b.nBits < 32 {
@@ -86,10 +94,9 @@ func (b *bitWriter) flushAlign() {

 // close will write the alignment bit and write the final byte(s)
 // to the output.
-func (b *bitWriter) close() error {
+func (b *bitWriter) close() {
 	// End mark
 	b.addBits16Clean(1, 1)
 	// flush until next byte.
 	b.flushAlign()
-	return nil
 }
--- a/vendor/github.com/klauspost/compress/huff0/compress.go
+++ b/vendor/github.com/klauspost/compress/huff0/compress.go
@@ -227,10 +227,10 @@ func EstimateSizes(in []byte, s *Scratch) (tableSz, dataSz, reuseSz int, err err
 }

 func (s *Scratch) compress1X(src []byte) ([]byte, error) {
-	return s.compress1xDo(s.Out, src)
+	return s.compress1xDo(s.Out, src), nil
 }

-func (s *Scratch) compress1xDo(dst, src []byte) ([]byte, error) {
+func (s *Scratch) compress1xDo(dst, src []byte) []byte {
 	var bw = bitWriter{out: dst}

 	// N is length divisible by 4.
@@ -248,8 +248,7 @@ func (s *Scratch) compress1xDo(dst, src []byte) ([]byte, error) {
 			tmp := src[n : n+4]
 			// tmp should be len 4
 			bw.flush32()
-			bw.encTwoSymbols(cTable, tmp[3], tmp[2])
-			bw.encTwoSymbols(cTable, tmp[1], tmp[0])
+			bw.encFourSymbols(cTable[tmp[3]], cTable[tmp[2]], cTable[tmp[1]], cTable[tmp[0]])
 		}
 	} else {
 		for ; n >= 0; n -= 4 {
@@ -261,8 +260,8 @@ func (s *Scratch) compress1xDo(dst, src []byte) ([]byte, error) {
 			bw.encTwoSymbols(cTable, tmp[1], tmp[0])
 		}
 	}
-	err := bw.close()
-	return bw.out, err
+	bw.close()
+	return bw.out
 }

 var sixZeros [6]byte
@@ -284,12 +283,8 @@ func (s *Scratch) compress4X(src []byte) ([]byte, error) {
 		}
 		src = src[len(toDo):]

-		var err error
 		idx := len(s.Out)
-		s.Out, err = s.compress1xDo(s.Out, toDo)
-		if err != nil {
-			return nil, err
-		}
+		s.Out = s.compress1xDo(s.Out, toDo)
 		if len(s.Out)-idx > math.MaxUint16 {
 			// We cannot store the size in the jump table
 			return nil, ErrIncompressible
@@ -316,7 +311,6 @@ func (s *Scratch) compress4Xp(src []byte) ([]byte, error) {

 	segmentSize := (len(src) + 3) / 4
 	var wg sync.WaitGroup
-	var errs [4]error
 	wg.Add(4)
 	for i := 0; i < 4; i++ {
 		toDo := src
@@ -327,15 +321,12 @@ func (s *Scratch) compress4Xp(src []byte) ([]byte, error) {

 		// Separate goroutine for each block.
 		go func(i int) {
-			s.tmpOut[i], errs[i] = s.compress1xDo(s.tmpOut[i][:0], toDo)
+			s.tmpOut[i] = s.compress1xDo(s.tmpOut[i][:0], toDo)
 			wg.Done()
 		}(i)
 	}
 	wg.Wait()
 	for i := 0; i < 4; i++ {
-		if errs[i] != nil {
-			return nil, errs[i]
-		}
 		o := s.tmpOut[i]
 		if len(o) > math.MaxUint16 {
 			// We cannot store the size in the jump table
--- a/vendor/github.com/klauspost/compress/huff0/decompress.go
+++ b/vendor/github.com/klauspost/compress/huff0/decompress.go
@@ -253,7 +253,7 @@ func (d *Decoder) decompress1X8Bit(dst, src []byte) ([]byte, error) {

 	switch d.actualTableLog {
 	case 8:
-		const shift = 8 - 8
+		const shift = 0
 		for br.off >= 4 {
 			br.fillFast()
 			v := dt[uint8(br.value>>(56+shift))]
--- a/vendor/github.com/klauspost/compress/internal/snapref/encode_other.go
+++ b/vendor/github.com/klauspost/compress/internal/snapref/encode_other.go
@@ -87,18 +87,6 @@ func emitCopy(dst []byte, offset, length int) int {
 	return i + 2
 }

-// extendMatch returns the largest k such that k <= len(src) and that
-// src[i:i+k-j] and src[j:k] have the same contents.
-//
-// It assumes that:
-//
-//	0 <= i && i < j && j <= len(src)
-func extendMatch(src []byte, i, j int) int {
-	for ; j < len(src) && src[i] == src[j]; i, j = i+1, j+1 {
-	}
-	return j
-}
-
 func hash(u, shift uint32) uint32 {
 	return (u * 0x1e35a7bd) >> shift
 }
--- a/vendor/github.com/klauspost/compress/s2/decode.go
+++ b/vendor/github.com/klauspost/compress/s2/decode.go
--- a/vendor/github.com/klauspost/compress/s2/dict.go
+++ b/vendor/github.com/klauspost/compress/s2/dict.go
@@ -106,6 +106,25 @@ func MakeDict(data []byte, searchStart []byte) *Dict {
 	return &d
 }

+// MakeDictManual will create a dictionary.
+// 'data' must be at least MinDictSize and less than or equal to MaxDictSize.
+// A manual first repeat index into data must be provided.
+// It must be less than len(data)-8.
+func MakeDictManual(data []byte, firstIdx uint16) *Dict {
+	if len(data) < MinDictSize || int(firstIdx) >= len(data)-8 || len(data) > MaxDictSize {
+		return nil
+	}
+	var d Dict
+	dict := data
+	d.dict = dict
+	if cap(d.dict) < len(d.dict)+16 {
+		d.dict = append(make([]byte, 0, len(d.dict)+16), d.dict...)
+	}
+
+	d.repeat = int(firstIdx)
+	return &d
+}
+
 // Encode returns the encoded form of src. The returned slice may be a sub-
 // slice of dst if dst was large enough to hold the entire encoded block.
 // Otherwise, a newly allocated slice will be returned.
--- a/vendor/github.com/klauspost/compress/s2/encode.go
+++ b/vendor/github.com/klauspost/compress/s2/encode.go
--- a/vendor/github.com/klauspost/compress/s2/encode_all.go
+++ b/vendor/github.com/klauspost/compress/s2/encode_all.go
@@ -742,7 +742,6 @@ searchDict:
 				x := load64(src, s-2)
 				m2Hash := hash6(x, tableBits)
 				currHash := hash6(x>>8, tableBits)
-				candidate = int(table[currHash])
 				table[m2Hash] = uint32(s - 2)
 				table[currHash] = uint32(s - 1)
 				cv = load64(src, s)
--- a/vendor/github.com/klauspost/compress/s2/encode_better.go
+++ b/vendor/github.com/klauspost/compress/s2/encode_better.go
@@ -157,7 +157,6 @@ func encodeBlockBetterGo(dst, src []byte) (d int) {
 				index0 := base + 1
 				index1 := s - 2

-				cv = load64(src, s)
 				for index0 < index1 {
 					cv0 := load64(src, index0)
 					cv1 := load64(src, index1)
@@ -269,18 +268,21 @@ func encodeBlockBetterGo(dst, src []byte) (d int) {
 		lTable[hash7(cv0, lTableBits)] = uint32(index0)
 		sTable[hash4(cv0>>8, sTableBits)] = uint32(index0 + 1)

+		// lTable could be postponed, but very minor difference.
 		lTable[hash7(cv1, lTableBits)] = uint32(index1)
 		sTable[hash4(cv1>>8, sTableBits)] = uint32(index1 + 1)
 		index0 += 1
 		index1 -= 1
 		cv = load64(src, s)

-		// index every second long in between.
-		for index0 < index1 {
+		// Index large values sparsely in between.
+		// We do two starting from different offsets for speed.
+		index2 := (index0 + index1 + 1) >> 1
+		for index2 < index1 {
 			lTable[hash7(load64(src, index0), lTableBits)] = uint32(index0)
-			lTable[hash7(load64(src, index1), lTableBits)] = uint32(index1)
+			lTable[hash7(load64(src, index2), lTableBits)] = uint32(index2)
 			index0 += 2
-			index1 -= 2
+			index2 += 2
 		}
 	}

@@ -459,12 +461,14 @@ func encodeBlockBetterSnappyGo(dst, src []byte) (d int) {
 		index1 -= 1
 		cv = load64(src, s)

-		// index every second long in between.
-		for index0 < index1 {
+		// Index large values sparsely in between.
+		// We do two starting from different offsets for speed.
+		index2 := (index0 + index1 + 1) >> 1
+		for index2 < index1 {
 			lTable[hash7(load64(src, index0), lTableBits)] = uint32(index0)
-			lTable[hash7(load64(src, index1), lTableBits)] = uint32(index1)
+			lTable[hash7(load64(src, index2), lTableBits)] = uint32(index2)
 			index0 += 2
-			index1 -= 2
+			index2 += 2
 		}
 	}

@@ -599,7 +603,6 @@ searchDict:
 					if s >= sLimit {
 						break searchDict
 					}
-					cv = load64(src, s)
 					// Index in-between
 					index0 := base + 1
 					index1 := s - 2
@@ -865,12 +868,14 @@ searchDict:
 		index1 -= 1
 		cv = load64(src, s)

-		// index every second long in between.
-		for index0 < index1 {
+		// Index large values sparsely in between.
+		// We do two starting from different offsets for speed.
+		index2 := (index0 + index1 + 1) >> 1
+		for index2 < index1 {
 			lTable[hash7(load64(src, index0), lTableBits)] = uint32(index0)
-			lTable[hash7(load64(src, index1), lTableBits)] = uint32(index1)
+			lTable[hash7(load64(src, index2), lTableBits)] = uint32(index2)
 			index0 += 2
-			index1 -= 2
+			index2 += 2
 		}
 	}

@@ -961,7 +966,6 @@ searchDict:
 				index0 := base + 1
 				index1 := s - 2

-				cv = load64(src, s)
 				for index0 < index1 {
 					cv0 := load64(src, index0)
 					cv1 := load64(src, index1)
@@ -1079,12 +1083,14 @@ searchDict:
 		index1 -= 1
 		cv = load64(src, s)

-		// index every second long in between.
-		for index0 < index1 {
+		// Index large values sparsely in between.
+		// We do two starting from different offsets for speed.
+		index2 := (index0 + index1 + 1) >> 1
+		for index2 < index1 {
 			lTable[hash7(load64(src, index0), lTableBits)] = uint32(index0)
-			lTable[hash7(load64(src, index1), lTableBits)] = uint32(index1)
+			lTable[hash7(load64(src, index2), lTableBits)] = uint32(index2)
 			index0 += 2
-			index1 -= 2
+			index2 += 2
 		}
 	}

--- a/vendor/github.com/klauspost/compress/s2/encode_go.go
+++ b/vendor/github.com/klauspost/compress/s2/encode_go.go
@@ -717,3 +717,11 @@ func cvtLZ4BlockAsm(dst []byte, src []byte) (uncompressed int, dstUsed int) {
 func cvtLZ4BlockSnappyAsm(dst []byte, src []byte) (uncompressed int, dstUsed int) {
 	panic("cvtLZ4BlockSnappyAsm should be unreachable")
 }
+
+func cvtLZ4sBlockAsm(dst []byte, src []byte) (uncompressed int, dstUsed int) {
+	panic("cvtLZ4sBlockAsm should be unreachable")
+}
+
+func cvtLZ4sBlockSnappyAsm(dst []byte, src []byte) (uncompressed int, dstUsed int) {
+	panic("cvtLZ4sBlockSnappyAsm should be unreachable")
+}
--- a/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.go
+++ b/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.go
@@ -212,7 +212,17 @@ func matchLen(a []byte, b []byte) int
 //go:noescape
 func cvtLZ4BlockAsm(dst []byte, src []byte) (uncompressed int, dstUsed int)

-// cvtLZ4Block converts an LZ4 block to S2
+// cvtLZ4sBlock converts an LZ4s block to S2
+//
+//go:noescape
+func cvtLZ4sBlockAsm(dst []byte, src []byte) (uncompressed int, dstUsed int)
+
+// cvtLZ4Block converts an LZ4 block to Snappy
 //
 //go:noescape
 func cvtLZ4BlockSnappyAsm(dst []byte, src []byte) (uncompressed int, dstUsed int)
+
+// cvtLZ4sBlock converts an LZ4s block to Snappy
+//
+//go:noescape
+func cvtLZ4sBlockSnappyAsm(dst []byte, src []byte) (uncompressed int, dstUsed int)
--- a/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s
+++ b/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s
--- a/vendor/github.com/klauspost/compress/s2/index.go
+++ b/vendor/github.com/klauspost/compress/s2/index.go
@@ -511,24 +511,22 @@ func IndexStream(r io.Reader) ([]byte, error) {

 // JSON returns the index as JSON text.
 func (i *Index) JSON() []byte {
+	type offset struct {
+		CompressedOffset   int64 `json:"compressed"`
+		UncompressedOffset int64 `json:"uncompressed"`
+	}
 	x := struct {
-		TotalUncompressed int64 `json:"total_uncompressed"` // Total Uncompressed size if known. Will be -1 if unknown.
-		TotalCompressed   int64 `json:"total_compressed"`   // Total Compressed size if known. Will be -1 if unknown.
-		Offsets           []struct {
-			CompressedOffset   int64 `json:"compressed"`
-			UncompressedOffset int64 `json:"uncompressed"`
-		} `json:"offsets"`
-		EstBlockUncomp int64 `json:"est_block_uncompressed"`
+		TotalUncompressed int64    `json:"total_uncompressed"` // Total Uncompressed size if known. Will be -1 if unknown.
+		TotalCompressed   int64    `json:"total_compressed"`   // Total Compressed size if known. Will be -1 if unknown.
+		Offsets           []offset `json:"offsets"`
+		EstBlockUncomp    int64    `json:"est_block_uncompressed"`
 	}{
 		TotalUncompressed: i.TotalUncompressed,
 		TotalCompressed:   i.TotalCompressed,
 		EstBlockUncomp:    i.estBlockUncomp,
 	}
 	for _, v := range i.info {
-		x.Offsets = append(x.Offsets, struct {
-			CompressedOffset   int64 `json:"compressed"`
-			UncompressedOffset int64 `json:"uncompressed"`
-		}{CompressedOffset: v.compressedOffset, UncompressedOffset: v.uncompressedOffset})
+		x.Offsets = append(x.Offsets, offset{CompressedOffset: v.compressedOffset, UncompressedOffset: v.uncompressedOffset})
 	}
 	b, _ := json.MarshalIndent(x, "", "  ")
 	return b
--- a/vendor/github.com/klauspost/compress/s2/lz4sconvert.go
+++ b/vendor/github.com/klauspost/compress/s2/lz4sconvert.go
@@ -0,0 +1,467 @@
+// Copyright (c) 2022 Klaus Post. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package s2
+
+import (
+	"encoding/binary"
+	"fmt"
+)
+
+// LZ4sConverter provides conversion from LZ4s.
+// (Intel modified LZ4 Blocks)
+// https://cdrdv2-public.intel.com/743912/743912-qat-programmers-guide-v2.0.pdf
+// LZ4s is a variant of LZ4 block format. LZ4s should be considered as an intermediate compressed block format.
+// The LZ4s format is selected when the application sets the compType to CPA_DC_LZ4S in CpaDcSessionSetupData.
+// The LZ4s block returned by the Intel® QAT hardware can be used by an external
+// software post-processing to generate other compressed data formats.
+// The following table lists the differences between LZ4 and LZ4s block format. LZ4s block format uses
+// the same high-level formatting as LZ4 block format with the following encoding changes:
+// For Min Match of 4 bytes, Copy length value 1-15 means length 4-18 with 18 bytes adding an extra byte.
+// ONLY "Min match of 4 bytes" is supported.
+type LZ4sConverter struct {
+}
+
+// ConvertBlock will convert an LZ4s block and append it as an S2
+// block without block length to dst.
+// The uncompressed size is returned as well.
+// dst must have capacity to contain the entire compressed block.
+func (l *LZ4sConverter) ConvertBlock(dst, src []byte) ([]byte, int, error) {
+	if len(src) == 0 {
+		return dst, 0, nil
+	}
+	const debug = false
+	const inline = true
+	const lz4MinMatch = 3
+
+	s, d := 0, len(dst)
+	dst = dst[:cap(dst)]
+	if !debug && hasAmd64Asm {
+		res, sz := cvtLZ4sBlockAsm(dst[d:], src)
+		if res < 0 {
+			const (
+				errCorrupt     = -1
+				errDstTooSmall = -2
+			)
+			switch res {
+			case errCorrupt:
+				return nil, 0, ErrCorrupt
+			case errDstTooSmall:
+				return nil, 0, ErrDstTooSmall
+			default:
+				return nil, 0, fmt.Errorf("unexpected result: %d", res)
+			}
+		}
+		if d+sz > len(dst) {
+			return nil, 0, ErrDstTooSmall
+		}
+		return dst[:d+sz], res, nil
+	}
+
+	dLimit := len(dst) - 10
+	var lastOffset uint16
+	var uncompressed int
+	if debug {
+		fmt.Printf("convert block start: len(src): %d, len(dst):%d \n", len(src), len(dst))
+	}
+
+	for {
+		if s >= len(src) {
+			return dst[:d], 0, ErrCorrupt
+		}
+		// Read literal info
+		token := src[s]
+		ll := int(token >> 4)
+		ml := int(lz4MinMatch + (token & 0xf))
+
+		// If upper nibble is 15, literal length is extended
+		if token >= 0xf0 {
+			for {
+				s++
+				if s >= len(src) {
+					if debug {
+						fmt.Printf("error reading ll: s (%d) >= len(src) (%d)\n", s, len(src))
+					}
+					return dst[:d], 0, ErrCorrupt
+				}
+				val := src[s]
+				ll += int(val)
+				if val != 255 {
+					break
+				}
+			}
+		}
+		// Skip past token
+		if s+ll >= len(src) {
+			if debug {
+				fmt.Printf("error literals: s+ll (%d+%d) >= len(src) (%d)\n", s, ll, len(src))
+			}
+			return nil, 0, ErrCorrupt
+		}
+		s++
+		if ll > 0 {
+			if d+ll > dLimit {
+				return nil, 0, ErrDstTooSmall
+			}
+			if debug {
+				fmt.Printf("emit %d literals\n", ll)
+			}
+			d += emitLiteralGo(dst[d:], src[s:s+ll])
+			s += ll
+			uncompressed += ll
+		}
+
+		// Check if we are done...
+		if ml == lz4MinMatch {
+			if s == len(src) {
+				break
+			}
+			// 0 bytes.
+			continue
+		}
+		// 2 byte offset
+		if s >= len(src)-2 {
+			if debug {
+				fmt.Printf("s (%d) >= len(src)-2 (%d)", s, len(src)-2)
+			}
+			return nil, 0, ErrCorrupt
+		}
+		offset := binary.LittleEndian.Uint16(src[s:])
+		s += 2
+		if offset == 0 {
+			if debug {
+				fmt.Printf("error: offset 0, ml: %d, len(src)-s: %d\n", ml, len(src)-s)
+			}
+			return nil, 0, ErrCorrupt
+		}
+		if int(offset) > uncompressed {
+			if debug {
+				fmt.Printf("error: offset (%d)> uncompressed (%d)\n", offset, uncompressed)
+			}
+			return nil, 0, ErrCorrupt
+		}
+
+		if ml == lz4MinMatch+15 {
+			for {
+				if s >= len(src) {
+					if debug {
+						fmt.Printf("error reading ml: s (%d) >= len(src) (%d)\n", s, len(src))
+					}
+					return nil, 0, ErrCorrupt
+				}
+				val := src[s]
+				s++
+				ml += int(val)
+				if val != 255 {
+					if s >= len(src) {
+						if debug {
+							fmt.Printf("error reading ml: s (%d) >= len(src) (%d)\n", s, len(src))
+						}
+						return nil, 0, ErrCorrupt
+					}
+					break
+				}
+			}
+		}
+		if offset == lastOffset {
+			if debug {
+				fmt.Printf("emit repeat, length: %d, offset: %d\n", ml, offset)
+			}
+			if !inline {
+				d += emitRepeat16(dst[d:], offset, ml)
+			} else {
+				length := ml
+				dst := dst[d:]
+				for len(dst) > 5 {
+					// Repeat offset, make length cheaper
+					length -= 4
+					if length <= 4 {
+						dst[0] = uint8(length)<<2 | tagCopy1
+						dst[1] = 0
+						d += 2
+						break
+					}
+					if length < 8 && offset < 2048 {
+						// Encode WITH offset
+						dst[1] = uint8(offset)
+						dst[0] = uint8(offset>>8)<<5 | uint8(length)<<2 | tagCopy1
+						d += 2
+						break
+					}
+					if length < (1<<8)+4 {
+						length -= 4
+						dst[2] = uint8(length)
+						dst[1] = 0
+						dst[0] = 5<<2 | tagCopy1
+						d += 3
+						break
+					}
+					if length < (1<<16)+(1<<8) {
+						length -= 1 << 8
+						dst[3] = uint8(length >> 8)
+						dst[2] = uint8(length >> 0)
+						dst[1] = 0
+						dst[0] = 6<<2 | tagCopy1
+						d += 4
+						break
+					}
+					const maxRepeat = (1 << 24) - 1
+					length -= 1 << 16
+					left := 0
+					if length > maxRepeat {
+						left = length - maxRepeat + 4
+						length = maxRepeat - 4
+					}
+					dst[4] = uint8(length >> 16)
+					dst[3] = uint8(length >> 8)
+					dst[2] = uint8(length >> 0)
+					dst[1] = 0
+					dst[0] = 7<<2 | tagCopy1
+					if left > 0 {
+						d += 5 + emitRepeat16(dst[5:], offset, left)
+						break
+					}
+					d += 5
+					break
+				}
+			}
+		} else {
+			if debug {
+				fmt.Printf("emit copy, length: %d, offset: %d\n", ml, offset)
+			}
+			if !inline {
+				d += emitCopy16(dst[d:], offset, ml)
+			} else {
+				length := ml
+				dst := dst[d:]
+				for len(dst) > 5 {
+					// Offset no more than 2 bytes.
+					if length > 64 {
+						off := 3
+						if offset < 2048 {
+							// emit 8 bytes as tagCopy1, rest as repeats.
+							dst[1] = uint8(offset)
+							dst[0] = uint8(offset>>8)<<5 | uint8(8-4)<<2 | tagCopy1
+							length -= 8
+							off = 2
+						} else {
+							// Emit a length 60 copy, encoded as 3 bytes.
+							// Emit remaining as repeat value (minimum 4 bytes).
+							dst[2] = uint8(offset >> 8)
+							dst[1] = uint8(offset)
+							dst[0] = 59<<2 | tagCopy2
+							length -= 60
+						}
+						// Emit remaining as repeats, at least 4 bytes remain.
+						d += off + emitRepeat16(dst[off:], offset, length)
+						break
+					}
+					if length >= 12 || offset >= 2048 {
+						// Emit the remaining copy, encoded as 3 bytes.
+						dst[2] = uint8(offset >> 8)
+						dst[1] = uint8(offset)
+						dst[0] = uint8(length-1)<<2 | tagCopy2
+						d += 3
+						break
+					}
+					// Emit the remaining copy, encoded as 2 bytes.
+					dst[1] = uint8(offset)
+					dst[0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1
+					d += 2
+					break
+				}
+			}
+			lastOffset = offset
+		}
+		uncompressed += ml
+		if d > dLimit {
+			return nil, 0, ErrDstTooSmall
+		}
+	}
+
+	return dst[:d], uncompressed, nil
+}
+
+// ConvertBlockSnappy will convert an LZ4s block and append it
+// as a Snappy block without block length to dst.
+// The uncompressed size is returned as well.
+// dst must have capacity to contain the entire compressed block.
+func (l *LZ4sConverter) ConvertBlockSnappy(dst, src []byte) ([]byte, int, error) {
+	if len(src) == 0 {
+		return dst, 0, nil
+	}
+	const debug = false
+	const lz4MinMatch = 3
+
+	s, d := 0, len(dst)
+	dst = dst[:cap(dst)]
+	// Use assembly when possible
+	if !debug && hasAmd64Asm {
+		res, sz := cvtLZ4sBlockSnappyAsm(dst[d:], src)
+		if res < 0 {
+			const (
+				errCorrupt     = -1
+				errDstTooSmall = -2
+			)
+			switch res {
+			case errCorrupt:
+				return nil, 0, ErrCorrupt
+			case errDstTooSmall:
+				return nil, 0, ErrDstTooSmall
+			default:
+				return nil, 0, fmt.Errorf("unexpected result: %d", res)
+			}
+		}
+		if d+sz > len(dst) {
+			return nil, 0, ErrDstTooSmall
+		}
+		return dst[:d+sz], res, nil
+	}
+
+	dLimit := len(dst) - 10
+	var uncompressed int
+	if debug {
+		fmt.Printf("convert block start: len(src): %d, len(dst):%d \n", len(src), len(dst))
+	}
+
+	for {
+		if s >= len(src) {
+			return nil, 0, ErrCorrupt
+		}
+		// Read literal info
+		token := src[s]
+		ll := int(token >> 4)
+		ml := int(lz4MinMatch + (token & 0xf))
+
+		// If upper nibble is 15, literal length is extended
+		if token >= 0xf0 {
+			for {
+				s++
+				if s >= len(src) {
+					if debug {
+						fmt.Printf("error reading ll: s (%d) >= len(src) (%d)\n", s, len(src))
+					}
+					return nil, 0, ErrCorrupt
+				}
+				val := src[s]
+				ll += int(val)
+				if val != 255 {
+					break
+				}
+			}
+		}
+		// Skip past token
+		if s+ll >= len(src) {
+			if debug {
+				fmt.Printf("error literals: s+ll (%d+%d) >= len(src) (%d)\n", s, ll, len(src))
+			}
+			return nil, 0, ErrCorrupt
+		}
+		s++
+		if ll > 0 {
+			if d+ll > dLimit {
+				return nil, 0, ErrDstTooSmall
+			}
+			if debug {
+				fmt.Printf("emit %d literals\n", ll)
+			}
+			d += emitLiteralGo(dst[d:], src[s:s+ll])
+			s += ll
+			uncompressed += ll
+		}
+
+		// Check if we are done...
+		if ml == lz4MinMatch {
+			if s == len(src) {
+				break
+			}
+			// 0 bytes.
+			continue
+		}
+		// 2 byte offset
+		if s >= len(src)-2 {
+			if debug {
+				fmt.Printf("s (%d) >= len(src)-2 (%d)", s, len(src)-2)
+			}
+			return nil, 0, ErrCorrupt
+		}
+		offset := binary.LittleEndian.Uint16(src[s:])
+		s += 2
+		if offset == 0 {
+			if debug {
+				fmt.Printf("error: offset 0, ml: %d, len(src)-s: %d\n", ml, len(src)-s)
+			}
+			return nil, 0, ErrCorrupt
+		}
+		if int(offset) > uncompressed {
+			if debug {
+				fmt.Printf("error: offset (%d)> uncompressed (%d)\n", offset, uncompressed)
+			}
+			return nil, 0, ErrCorrupt
+		}
+
+		if ml == lz4MinMatch+15 {
+			for {
+				if s >= len(src) {
+					if debug {
+						fmt.Printf("error reading ml: s (%d) >= len(src) (%d)\n", s, len(src))
+					}
+					return nil, 0, ErrCorrupt
+				}
+				val := src[s]
+				s++
+				ml += int(val)
+				if val != 255 {
+					if s >= len(src) {
+						if debug {
+							fmt.Printf("error reading ml: s (%d) >= len(src) (%d)\n", s, len(src))
+						}
+						return nil, 0, ErrCorrupt
+					}
+					break
+				}
+			}
+		}
+		if debug {
+			fmt.Printf("emit copy, length: %d, offset: %d\n", ml, offset)
+		}
+		length := ml
+		// d += emitCopyNoRepeat(dst[d:], int(offset), ml)
+		for length > 0 {
+			if d >= dLimit {
+				return nil, 0, ErrDstTooSmall
+			}
+
+			// Offset no more than 2 bytes.
+			if length > 64 {
+				// Emit a length 64 copy, encoded as 3 bytes.
+				dst[d+2] = uint8(offset >> 8)
+				dst[d+1] = uint8(offset)
+				dst[d+0] = 63<<2 | tagCopy2
+				length -= 64
+				d += 3
+				continue
+			}
+			if length >= 12 || offset >= 2048 || length < 4 {
+				// Emit the remaining copy, encoded as 3 bytes.
+				dst[d+2] = uint8(offset >> 8)
+				dst[d+1] = uint8(offset)
+				dst[d+0] = uint8(length-1)<<2 | tagCopy2
+				d += 3
+				break
+			}
+			// Emit the remaining copy, encoded as 2 bytes.
+			dst[d+1] = uint8(offset)
+			dst[d+0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1
+			d += 2
+			break
+		}
+		uncompressed += ml
+		if d > dLimit {
+			return nil, 0, ErrDstTooSmall
+		}
+	}
+
+	return dst[:d], uncompressed, nil
+}
--- a/vendor/github.com/klauspost/compress/s2/reader.go
+++ b/vendor/github.com/klauspost/compress/s2/reader.go
--- a/vendor/github.com/klauspost/compress/s2/writer.go
+++ b/vendor/github.com/klauspost/compress/s2/writer.go
--- a/vendor/github.com/klauspost/compress/zstd/README.md
+++ b/vendor/github.com/klauspost/compress/zstd/README.md
@@ -304,7 +304,7 @@ import "github.com/klauspost/compress/zstd"

 // Create a reader that caches decompressors.
 // For this operation type we supply a nil Reader.
-var decoder, _ = zstd.NewReader(nil, WithDecoderConcurrency(0))
+var decoder, _ = zstd.NewReader(nil, zstd.WithDecoderConcurrency(0))

 // Decompress a buffer. We don't supply a destination buffer,
 // so it will be allocated by the decoder.
--- a/vendor/github.com/klauspost/compress/zstd/bitreader.go
+++ b/vendor/github.com/klauspost/compress/zstd/bitreader.go
@@ -17,7 +17,6 @@ import (
 // for aligning the input.
 type bitReader struct {
 	in       []byte
-	off      uint   // next byte to read is at in[off - 1]
 	value    uint64 // Maybe use [16]byte, but shifting is awkward.
 	bitsRead uint8
 }
@@ -28,7 +27,6 @@ func (b *bitReader) init(in []byte) error {
 		return errors.New("corrupt stream: too short")
 	}
 	b.in = in
-	b.off = uint(len(in))
 	// The highest bit of the last byte indicates where to start
 	v := in[len(in)-1]
 	if v == 0 {
@@ -69,21 +67,19 @@ func (b *bitReader) fillFast() {
 	if b.bitsRead < 32 {
 		return
 	}
-	// 2 bounds checks.
-	v := b.in[b.off-4:]
-	v = v[:4]
+	v := b.in[len(b.in)-4:]
+	b.in = b.in[:len(b.in)-4]
 	low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
 	b.value = (b.value << 32) | uint64(low)
 	b.bitsRead -= 32
-	b.off -= 4
 }

 // fillFastStart() assumes the bitreader is empty and there is at least 8 bytes to read.
 func (b *bitReader) fillFastStart() {
-	// Do single re-slice to avoid bounds checks.
-	b.value = binary.LittleEndian.Uint64(b.in[b.off-8:])
+	v := b.in[len(b.in)-8:]
+	b.in = b.in[:len(b.in)-8]
+	b.value = binary.LittleEndian.Uint64(v)
 	b.bitsRead = 0
-	b.off -= 8
 }

 // fill() will make sure at least 32 bits are available.
@@ -91,25 +87,25 @@ func (b *bitReader) fill() {
 	if b.bitsRead < 32 {
 		return
 	}
-	if b.off >= 4 {
-		v := b.in[b.off-4:]
-		v = v[:4]
+	if len(b.in) >= 4 {
+		v := b.in[len(b.in)-4:]
+		b.in = b.in[:len(b.in)-4]
 		low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
 		b.value = (b.value << 32) | uint64(low)
 		b.bitsRead -= 32
-		b.off -= 4
 		return
 	}
-	for b.off > 0 {
-		b.value = (b.value << 8) | uint64(b.in[b.off-1])
-		b.bitsRead -= 8
-		b.off--
+
+	b.bitsRead -= uint8(8 * len(b.in))
+	for len(b.in) > 0 {
+		b.value = (b.value << 8) | uint64(b.in[len(b.in)-1])
+		b.in = b.in[:len(b.in)-1]
 	}
 }

 // finished returns true if all bits have been read from the bit stream.
 func (b *bitReader) finished() bool {
-	return b.off == 0 && b.bitsRead >= 64
+	return len(b.in) == 0 && b.bitsRead >= 64
 }

 // overread returns true if more bits have been requested than is on the stream.
@@ -119,7 +115,7 @@ func (b *bitReader) overread() bool {

 // remain returns the number of bits remaining.
 func (b *bitReader) remain() uint {
-	return b.off*8 + 64 - uint(b.bitsRead)
+	return 8*uint(len(b.in)) + 64 - uint(b.bitsRead)
 }

 // close the bitstream and returns an error if out-of-buffer reads occurred.
--- a/vendor/github.com/klauspost/compress/zstd/bitwriter.go
+++ b/vendor/github.com/klauspost/compress/zstd/bitwriter.go
@@ -97,12 +97,11 @@ func (b *bitWriter) flushAlign() {

 // close will write the alignment bit and write the final byte(s)
 // to the output.
-func (b *bitWriter) close() error {
+func (b *bitWriter) close() {
 	// End mark
 	b.addBits16Clean(1, 1)
 	// flush until next byte.
 	b.flushAlign()
-	return nil
 }

 // reset and continue writing by appending to out.
--- a/vendor/github.com/klauspost/compress/zstd/blockdec.go
+++ b/vendor/github.com/klauspost/compress/zstd/blockdec.go
@@ -9,6 +9,7 @@ import (
 	"encoding/binary"
 	"errors"
 	"fmt"
+	"hash/crc32"
 	"io"
 	"os"
 	"path/filepath"
@@ -442,6 +443,9 @@ func (b *blockDec) decodeLiterals(in []byte, hist *history) (remain []byte, err
 			}
 		}
 		var err error
+		if debugDecoder {
+			println("huff table input:", len(literals), "CRC:", crc32.ChecksumIEEE(literals))
+		}
 		huff, literals, err = huff0.ReadTable(literals, huff)
 		if err != nil {
 			println("reading huffman table:", err)
@@ -588,7 +592,7 @@ func (b *blockDec) prepareSequences(in []byte, hist *history) (err error) {
 				}
 				seq.fse.setRLE(symb)
 				if debugDecoder {
-					printf("RLE set to %+v, code: %v", symb, v)
+					printf("RLE set to 0x%x, code: %v", symb, v)
 				}
 			case compModeFSE:
 				println("Reading table for", tableIndex(i))
--- a/vendor/github.com/klauspost/compress/zstd/blockenc.go
+++ b/vendor/github.com/klauspost/compress/zstd/blockenc.go
@@ -361,14 +361,21 @@ func (b *blockEnc) encodeLits(lits []byte, raw bool) error {
 	if len(lits) >= 1024 {
 		// Use 4 Streams.
 		out, reUsed, err = huff0.Compress4X(lits, b.litEnc)
-	} else if len(lits) > 32 {
+	} else if len(lits) > 16 {
 		// Use 1 stream
 		single = true
 		out, reUsed, err = huff0.Compress1X(lits, b.litEnc)
 	} else {
 		err = huff0.ErrIncompressible
 	}
-
+	if err == nil && len(out)+5 > len(lits) {
+		// If we are close, we may still be worse or equal to raw.
+		var lh literalsHeader
+		lh.setSizes(len(out), len(lits), single)
+		if len(out)+lh.size() >= len(lits) {
+			err = huff0.ErrIncompressible
+		}
+	}
 	switch err {
 	case huff0.ErrIncompressible:
 		if debugEncoder {
@@ -473,7 +480,7 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
 		return b.encodeLits(b.literals, rawAllLits)
 	}
 	// We want some difference to at least account for the headers.
-	saved := b.size - len(b.literals) - (b.size >> 5)
+	saved := b.size - len(b.literals) - (b.size >> 6)
 	if saved < 16 {
 		if org == nil {
 			return errIncompressible
@@ -503,7 +510,7 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
 	if len(b.literals) >= 1024 && !raw {
 		// Use 4 Streams.
 		out, reUsed, err = huff0.Compress4X(b.literals, b.litEnc)
-	} else if len(b.literals) > 32 && !raw {
+	} else if len(b.literals) > 16 && !raw {
 		// Use 1 stream
 		single = true
 		out, reUsed, err = huff0.Compress1X(b.literals, b.litEnc)
@@ -511,6 +518,17 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
 		err = huff0.ErrIncompressible
 	}

+	if err == nil && len(out)+5 > len(b.literals) {
+		// If we are close, we may still be worse or equal to raw.
+		var lh literalsHeader
+		lh.setSize(len(b.literals))
+		szRaw := lh.size()
+		lh.setSizes(len(out), len(b.literals), single)
+		szComp := lh.size()
+		if len(out)+szComp >= len(b.literals)+szRaw {
+			err = huff0.ErrIncompressible
+		}
+	}
 	switch err {
 	case huff0.ErrIncompressible:
 		lh.setType(literalsBlockRaw)
@@ -773,16 +791,16 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
 	ml.flush(mlEnc.actualTableLog)
 	of.flush(ofEnc.actualTableLog)
 	ll.flush(llEnc.actualTableLog)
-	err = wr.close()
-	if err != nil {
-		return err
-	}
+	wr.close()
 	b.output = wr.out

+	// Maybe even add a bigger margin.
 	if len(b.output)-3-bhOffset >= b.size {
-		// Maybe even add a bigger margin.
+		// Discard and encode as raw block.
+		b.output = b.encodeRawTo(b.output[:bhOffset], org)
+		b.popOffsets()
 		b.litEnc.Reuse = huff0.ReusePolicyNone
-		return errIncompressible
+		return nil
 	}

 	// Size is output minus block header.
--- a/vendor/github.com/klauspost/compress/zstd/bytebuf.go
+++ b/vendor/github.com/klauspost/compress/zstd/bytebuf.go
@@ -54,7 +54,7 @@ func (b *byteBuf) readBig(n int, dst []byte) ([]byte, error) {
 func (b *byteBuf) readByte() (byte, error) {
 	bb := *b
 	if len(bb) < 1 {
-		return 0, nil
+		return 0, io.ErrUnexpectedEOF
 	}
 	r := bb[0]
 	*b = bb[1:]
@@ -109,7 +109,7 @@ func (r *readerWrapper) readBig(n int, dst []byte) ([]byte, error) {
 }

 func (r *readerWrapper) readByte() (byte, error) {
-	n2, err := r.r.Read(r.tmp[:1])
+	n2, err := io.ReadFull(r.r, r.tmp[:1])
 	if err != nil {
 		if err == io.EOF {
 			err = io.ErrUnexpectedEOF
--- a/vendor/github.com/klauspost/compress/zstd/decoder.go
+++ b/vendor/github.com/klauspost/compress/zstd/decoder.go
@@ -455,12 +455,7 @@ func (d *Decoder) nextBlock(blocking bool) (ok bool) {
 	}

 	if len(next.b) > 0 {
-		n, err := d.current.crc.Write(next.b)
-		if err == nil {
-			if n != len(next.b) {
-				d.current.err = io.ErrShortWrite
-			}
-		}
+		d.current.crc.Write(next.b)
 	}
 	if next.err == nil && next.d != nil && next.d.hasCRC {
 		got := uint32(d.current.crc.Sum64())
--- a/vendor/github.com/klauspost/compress/zstd/decoder_options.go
+++ b/vendor/github.com/klauspost/compress/zstd/decoder_options.go
@@ -107,7 +107,7 @@ func WithDecoderDicts(dicts ...[]byte) DOption {
 	}
 }

-// WithEncoderDictRaw registers a dictionary that may be used by the decoder.
+// WithDecoderDictRaw registers a dictionary that may be used by the decoder.
 // The slice content can be arbitrary data.
 func WithDecoderDictRaw(id uint32, content []byte) DOption {
 	return func(o *decoderOptions) error {
--- a/vendor/github.com/klauspost/compress/zstd/dict.go
+++ b/vendor/github.com/klauspost/compress/zstd/dict.go
@@ -1,10 +1,13 @@
 package zstd

 import (
+	"bytes"
 	"encoding/binary"
 	"errors"
 	"fmt"
 	"io"
+	"math"
+	"sort"

 	"github.com/klauspost/compress/huff0"
 )
@@ -14,9 +17,8 @@ type dict struct {

 	litEnc              *huff0.Scratch
 	llDec, ofDec, mlDec sequenceDec
-	//llEnc, ofEnc, mlEnc []*fseEncoder
-	offsets [3]int
-	content []byte
+	offsets             [3]int
+	content             []byte
 }

 const dictMagic = "\x37\xa4\x30\xec"
@@ -159,3 +161,374 @@ func InspectDictionary(b []byte) (interface {
 	d, err := loadDict(b)
 	return d, err
 }
+
+type BuildDictOptions struct {
+	// Dictionary ID.
+	ID uint32
+
+	// Content to use to create dictionary tables.
+	Contents [][]byte
+
+	// History to use for all blocks.
+	History []byte
+
+	// Offsets to use.
+	Offsets [3]int
+
+	// CompatV155 will make the dictionary compatible with Zstd v1.5.5 and earlier.
+	// See https://github.com/facebook/zstd/issues/3724
+	CompatV155 bool
+
+	// Use the specified encoder level.
+	// The dictionary will be built using the specified encoder level,
+	// which will reflect speed and make the dictionary tailored for that level.
+	// If not set SpeedBestCompression will be used.
+	Level EncoderLevel
+
+	// DebugOut will write stats and other details here if set.
+	DebugOut io.Writer
+}
+
+func BuildDict(o BuildDictOptions) ([]byte, error) {
+	initPredefined()
+	hist := o.History
+	contents := o.Contents
+	debug := o.DebugOut != nil
+	println := func(args ...interface{}) {
+		if o.DebugOut != nil {
+			fmt.Fprintln(o.DebugOut, args...)
+		}
+	}
+	printf := func(s string, args ...interface{}) {
+		if o.DebugOut != nil {
+			fmt.Fprintf(o.DebugOut, s, args...)
+		}
+	}
+	print := func(args ...interface{}) {
+		if o.DebugOut != nil {
+			fmt.Fprint(o.DebugOut, args...)
+		}
+	}
+
+	if int64(len(hist)) > dictMaxLength {
+		return nil, fmt.Errorf("dictionary of size %d > %d", len(hist), int64(dictMaxLength))
+	}
+	if len(hist) < 8 {
+		return nil, fmt.Errorf("dictionary of size %d < %d", len(hist), 8)
+	}
+	if len(contents) == 0 {
+		return nil, errors.New("no content provided")
+	}
+	d := dict{
+		id:      o.ID,
+		litEnc:  nil,
+		llDec:   sequenceDec{},
+		ofDec:   sequenceDec{},
+		mlDec:   sequenceDec{},
+		offsets: o.Offsets,
+		content: hist,
+	}
+	block := blockEnc{lowMem: false}
+	block.init()
+	enc := encoder(&bestFastEncoder{fastBase: fastBase{maxMatchOff: int32(maxMatchLen), bufferReset: math.MaxInt32 - int32(maxMatchLen*2), lowMem: false}})
+	if o.Level != 0 {
+		eOpts := encoderOptions{
+			level:      o.Level,
+			blockSize:  maxMatchLen,
+			windowSize: maxMatchLen,
+			dict:       &d,
+			lowMem:     false,
+		}
+		enc = eOpts.encoder()
+	} else {
+		o.Level = SpeedBestCompression
+	}
+	var (
+		remain [256]int
+		ll     [256]int
+		ml     [256]int
+		of     [256]int
+	)
+	addValues := func(dst *[256]int, src []byte) {
+		for _, v := range src {
+			dst[v]++
+		}
+	}
+	addHist := func(dst *[256]int, src *[256]uint32) {
+		for i, v := range src {
+			dst[i] += int(v)
+		}
+	}
+	seqs := 0
+	nUsed := 0
+	litTotal := 0
+	newOffsets := make(map[uint32]int, 1000)
+	for _, b := range contents {
+		block.reset(nil)
+		if len(b) < 8 {
+			continue
+		}
+		nUsed++
+		enc.Reset(&d, true)
+		enc.Encode(&block, b)
+		addValues(&remain, block.literals)
+		litTotal += len(block.literals)
+		seqs += len(block.sequences)
+		block.genCodes()
+		addHist(&ll, block.coders.llEnc.Histogram())
+		addHist(&ml, block.coders.mlEnc.Histogram())
+		addHist(&of, block.coders.ofEnc.Histogram())
+		for i, seq := range block.sequences {
+			if i > 3 {
+				break
+			}
+			offset := seq.offset
+			if offset == 0 {
+				continue
+			}
+			if offset > 3 {
+				newOffsets[offset-3]++
+			} else {
+				newOffsets[uint32(o.Offsets[offset-1])]++
+			}
+		}
+	}
+	// Find most used offsets.
+	var sortedOffsets []uint32
+	for k := range newOffsets {
+		sortedOffsets = append(sortedOffsets, k)
+	}
+	sort.Slice(sortedOffsets, func(i, j int) bool {
+		a, b := sortedOffsets[i], sortedOffsets[j]
+		if a == b {
+			// Prefer the longer offset
+			return sortedOffsets[i] > sortedOffsets[j]
+		}
+		return newOffsets[sortedOffsets[i]] > newOffsets[sortedOffsets[j]]
+	})
+	if len(sortedOffsets) > 3 {
+		if debug {
+			print("Offsets:")
+			for i, v := range sortedOffsets {
+				if i > 20 {
+					break
+				}
+				printf("[%d: %d],", v, newOffsets[v])
+			}
+			println("")
+		}
+
+		sortedOffsets = sortedOffsets[:3]
+	}
+	for i, v := range sortedOffsets {
+		o.Offsets[i] = int(v)
+	}
+	if debug {
+		println("New repeat offsets", o.Offsets)
+	}
+
+	if nUsed == 0 || seqs == 0 {
+		return nil, fmt.Errorf("%d blocks, %d sequences found", nUsed, seqs)
+	}
+	if debug {
+		println("Sequences:", seqs, "Blocks:", nUsed, "Literals:", litTotal)
+	}
+	if seqs/nUsed < 512 {
+		// Use 512 as minimum.
+		nUsed = seqs / 512
+	}
+	copyHist := func(dst *fseEncoder, src *[256]int) ([]byte, error) {
+		hist := dst.Histogram()
+		var maxSym uint8
+		var maxCount int
+		var fakeLength int
+		for i, v := range src {
+			if v > 0 {
+				v = v / nUsed
+				if v == 0 {
+					v = 1
+				}
+			}
+			if v > maxCount {
+				maxCount = v
+			}
+			if v != 0 {
+				maxSym = uint8(i)
+			}
+			fakeLength += v
+			hist[i] = uint32(v)
+		}
+		dst.HistogramFinished(maxSym, maxCount)
+		dst.reUsed = false
+		dst.useRLE = false
+		err := dst.normalizeCount(fakeLength)
+		if err != nil {
+			return nil, err
+		}
+		if debug {
+			println("RAW:", dst.count[:maxSym+1], "NORM:", dst.norm[:maxSym+1], "LEN:", fakeLength)
+		}
+		return dst.writeCount(nil)
+	}
+	if debug {
+		print("Literal lengths: ")
+	}
+	llTable, err := copyHist(block.coders.llEnc, &ll)
+	if err != nil {
+		return nil, err
+	}
+	if debug {
+		print("Match lengths: ")
+	}
+	mlTable, err := copyHist(block.coders.mlEnc, &ml)
+	if err != nil {
+		return nil, err
+	}
+	if debug {
+		print("Offsets: ")
+	}
+	ofTable, err := copyHist(block.coders.ofEnc, &of)
+	if err != nil {
+		return nil, err
+	}
+
+	// Literal table
+	avgSize := litTotal
+	if avgSize > huff0.BlockSizeMax/2 {
+		avgSize = huff0.BlockSizeMax / 2
+	}
+	huffBuff := make([]byte, 0, avgSize)
+	// Target size
+	div := litTotal / avgSize
+	if div < 1 {
+		div = 1
+	}
+	if debug {
+		println("Huffman weights:")
+	}
+	for i, n := range remain[:] {
+		if n > 0 {
+			n = n / div
+			// Allow all entries to be represented.
+			if n == 0 {
+				n = 1
+			}
+			huffBuff = append(huffBuff, bytes.Repeat([]byte{byte(i)}, n)...)
+			if debug {
+				printf("[%d: %d], ", i, n)
+			}
+		}
+	}
+	if o.CompatV155 && remain[255]/div == 0 {
+		huffBuff = append(huffBuff, 255)
+	}
+	scratch := &huff0.Scratch{TableLog: 11}
+	for tries := 0; tries < 255; tries++ {
+		scratch = &huff0.Scratch{TableLog: 11}
+		_, _, err = huff0.Compress1X(huffBuff, scratch)
+		if err == nil {
+			break
+		}
+		if debug {
+			printf("Try %d: Huffman error: %v\n", tries+1, err)
+		}
+		huffBuff = huffBuff[:0]
+		if tries == 250 {
+			if debug {
+				println("Huffman: Bailing out with predefined table")
+			}
+
+			// Bail out.... Just generate something
+			huffBuff = append(huffBuff, bytes.Repeat([]byte{255}, 10000)...)
+			for i := 0; i < 128; i++ {
+				huffBuff = append(huffBuff, byte(i))
+			}
+			continue
+		}
+		if errors.Is(err, huff0.ErrIncompressible) {
+			// Try truncating least common.
+			for i, n := range remain[:] {
+				if n > 0 {
+					n = n / (div * (i + 1))
+					if n > 0 {
+						huffBuff = append(huffBuff, bytes.Repeat([]byte{byte(i)}, n)...)
+					}
+				}
+			}
+			if o.CompatV155 && len(huffBuff) > 0 && huffBuff[len(huffBuff)-1] != 255 {
+				huffBuff = append(huffBuff, 255)
+			}
+			if len(huffBuff) == 0 {
+				huffBuff = append(huffBuff, 0, 255)
+			}
+		}
+		if errors.Is(err, huff0.ErrUseRLE) {
+			for i, n := range remain[:] {
+				n = n / (div * (i + 1))
+				// Allow all entries to be represented.
+				if n == 0 {
+					n = 1
+				}
+				huffBuff = append(huffBuff, bytes.Repeat([]byte{byte(i)}, n)...)
+			}
+		}
+	}
+
+	var out bytes.Buffer
+	out.Write([]byte(dictMagic))
+	out.Write(binary.LittleEndian.AppendUint32(nil, o.ID))
+	out.Write(scratch.OutTable)
+	if debug {
+		println("huff table:", len(scratch.OutTable), "bytes")
+		println("of table:", len(ofTable), "bytes")
+		println("ml table:", len(mlTable), "bytes")
+		println("ll table:", len(llTable), "bytes")
+	}
+	out.Write(ofTable)
+	out.Write(mlTable)
+	out.Write(llTable)
+	out.Write(binary.LittleEndian.AppendUint32(nil, uint32(o.Offsets[0])))
+	out.Write(binary.LittleEndian.AppendUint32(nil, uint32(o.Offsets[1])))
+	out.Write(binary.LittleEndian.AppendUint32(nil, uint32(o.Offsets[2])))
+	out.Write(hist)
+	if debug {
+		_, err := loadDict(out.Bytes())
+		if err != nil {
+			panic(err)
+		}
+		i, err := InspectDictionary(out.Bytes())
+		if err != nil {
+			panic(err)
+		}
+		println("ID:", i.ID())
+		println("Content size:", i.ContentSize())
+		println("Encoder:", i.LitEncoder() != nil)
+		println("Offsets:", i.Offsets())
+		var totalSize int
+		for _, b := range contents {
+			totalSize += len(b)
+		}
+
+		encWith := func(opts ...EOption) int {
+			enc, err := NewWriter(nil, opts...)
+			if err != nil {
+				panic(err)
+			}
+			defer enc.Close()
+			var dst []byte
+			var totalSize int
+			for _, b := range contents {
+				dst = enc.EncodeAll(b, dst[:0])
+				totalSize += len(dst)
+			}
+			return totalSize
+		}
+		plain := encWith(WithEncoderLevel(o.Level))
+		withDict := encWith(WithEncoderLevel(o.Level), WithEncoderDict(out.Bytes()))
+		println("Input size:", totalSize)
+		println("Plain Compressed:", plain)
+		println("Dict Compressed:", withDict)
+		println("Saved:", plain-withDict, (plain-withDict)/len(contents), "bytes per input (rounded down)")
+	}
+	return out.Bytes(), nil
+}
--- a/vendor/github.com/klauspost/compress/zstd/enc_base.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_base.go
@@ -144,6 +144,7 @@ func (e *fastBase) resetBase(d *dict, singleBlock bool) {
 	} else {
 		e.crc.Reset()
 	}
+	e.blk.dictLitEnc = nil
 	if d != nil {
 		low := e.lowMem
 		if singleBlock {
--- a/vendor/github.com/klauspost/compress/zstd/enc_best.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_best.go
@@ -32,10 +32,9 @@ type match struct {
 	length int32
 	rep    int32
 	est    int32
-	_      [12]byte // Aligned size to cache line: 4+4+4+4+4 bytes + 12 bytes padding = 32 bytes
 }

-const highScore = 25000
+const highScore = maxMatchLen * 8

 // estBits will estimate output bits from predefined tables.
 func (m *match) estBits(bitsPerByte int32) {
@@ -160,7 +159,6 @@ func (e *bestFastEncoder) Encode(blk *blockEnc, src []byte) {

 	// nextEmit is where in src the next emitLiteral should start from.
 	nextEmit := s
-	cv := load6432(src, s)

 	// Relative offsets
 	offset1 := int32(blk.recentOffsets[0])
@@ -174,7 +172,6 @@ func (e *bestFastEncoder) Encode(blk *blockEnc, src []byte) {
 		blk.literals = append(blk.literals, src[nextEmit:until]...)
 		s.litLen = uint32(until - nextEmit)
 	}
-	_ = addLiterals

 	if debugEncoder {
 		println("recent offsets:", blk.recentOffsets)
@@ -189,53 +186,96 @@ encodeLoop:
 			panic("offset0 was 0")
 		}

-		bestOf := func(a, b *match) *match {
-			if a.est-b.est+(a.s-b.s)*bitsPerByte>>10 < 0 {
-				return a
-			}
-			return b
-		}
-		const goodEnough = 100
+		const goodEnough = 250
+
+		cv := load6432(src, s)

 		nextHashL := hashLen(cv, bestLongTableBits, bestLongLen)
 		nextHashS := hashLen(cv, bestShortTableBits, bestShortLen)
 		candidateL := e.longTable[nextHashL]
 		candidateS := e.table[nextHashS]

-		matchAt := func(offset int32, s int32, first uint32, rep int32) match {
+		// Set m to a match at offset if it looks like that will improve compression.
+		improve := func(m *match, offset int32, s int32, first uint32, rep int32) {
 			if s-offset >= e.maxMatchOff || load3232(src, offset) != first {
-				return match{s: s, est: highScore}
+				return
 			}
 			if debugAsserts {
+				if offset <= 0 {
+					panic(offset)
+				}
 				if !bytes.Equal(src[s:s+4], src[offset:offset+4]) {
 					panic(fmt.Sprintf("first match mismatch: %v != %v, first: %08x", src[s:s+4], src[offset:offset+4], first))
 				}
 			}
-			m := match{offset: offset, s: s, length: 4 + e.matchlen(s+4, offset+4, src), rep: rep}
-			m.estBits(bitsPerByte)
-			return m
+			// Try to quick reject if we already have a long match.
+			if m.length > 16 {
+				left := len(src) - int(m.s+m.length)
+				// If we are too close to the end, keep as is.
+				if left <= 0 {
+					return
+				}
+				checkLen := m.length - (s - m.s) - 8
+				if left > 2 && checkLen > 4 {
+					// Check 4 bytes, 4 bytes from the end of the current match.
+					a := load3232(src, offset+checkLen)
+					b := load3232(src, s+checkLen)
+					if a != b {
+						return
+					}
+				}
+			}
+			l := 4 + e.matchlen(s+4, offset+4, src)
+			if rep < 0 {
+				// Extend candidate match backwards as far as possible.
+				tMin := s - e.maxMatchOff
+				if tMin < 0 {
+					tMin = 0
+				}
+				for offset > tMin && s > nextEmit && src[offset-1] == src[s-1] && l < maxMatchLength {
+					s--
+					offset--
+					l++
+				}
+			}
+
+			cand := match{offset: offset, s: s, length: l, rep: rep}
+			cand.estBits(bitsPerByte)
+			if m.est >= highScore || cand.est-m.est+(cand.s-m.s)*bitsPerByte>>10 < 0 {
+				*m = cand
+			}
 		}

-		m1 := matchAt(candidateL.offset-e.cur, s, uint32(cv), -1)
-		m2 := matchAt(candidateL.prev-e.cur, s, uint32(cv), -1)
-		m3 := matchAt(candidateS.offset-e.cur, s, uint32(cv), -1)
-		m4 := matchAt(candidateS.prev-e.cur, s, uint32(cv), -1)
-		best := bestOf(bestOf(&m1, &m2), bestOf(&m3, &m4))
+		best := match{s: s, est: highScore}
+		improve(&best, candidateL.offset-e.cur, s, uint32(cv), -1)
+		improve(&best, candidateL.prev-e.cur, s, uint32(cv), -1)
+		improve(&best, candidateS.offset-e.cur, s, uint32(cv), -1)
+		improve(&best, candidateS.prev-e.cur, s, uint32(cv), -1)

 		if canRepeat && best.length < goodEnough {
-			cv32 := uint32(cv >> 8)
-			spp := s + 1
-			m1 := matchAt(spp-offset1, spp, cv32, 1)
-			m2 := matchAt(spp-offset2, spp, cv32, 2)
-			m3 := matchAt(spp-offset3, spp, cv32, 3)
-			best = bestOf(bestOf(best, &m1), bestOf(&m2, &m3))
-			if best.length > 0 {
-				cv32 = uint32(cv >> 24)
-				spp += 2
-				m1 := matchAt(spp-offset1, spp, cv32, 1)
-				m2 := matchAt(spp-offset2, spp, cv32, 2)
-				m3 := matchAt(spp-offset3, spp, cv32, 3)
-				best = bestOf(bestOf(best, &m1), bestOf(&m2, &m3))
+			if s == nextEmit {
+				// Check repeats straight after a match.
+				improve(&best, s-offset2, s, uint32(cv), 1|4)
+				improve(&best, s-offset3, s, uint32(cv), 2|4)
+				if offset1 > 1 {
+					improve(&best, s-(offset1-1), s, uint32(cv), 3|4)
+				}
+			}
+
+			// If either no match or a non-repeat match, check at + 1
+			if best.rep <= 0 {
+				cv32 := uint32(cv >> 8)
+				spp := s + 1
+				improve(&best, spp-offset1, spp, cv32, 1)
+				improve(&best, spp-offset2, spp, cv32, 2)
+				improve(&best, spp-offset3, spp, cv32, 3)
+				if best.rep < 0 {
+					cv32 = uint32(cv >> 24)
+					spp += 2
+					improve(&best, spp-offset1, spp, cv32, 1)
+					improve(&best, spp-offset2, spp, cv32, 2)
+					improve(&best, spp-offset3, spp, cv32, 3)
+				}
 			}
 		}
 		// Load next and check...
@@ -250,47 +290,45 @@ encodeLoop:
 				if s >= sLimit {
 					break encodeLoop
 				}
-				cv = load6432(src, s)
 				continue
 			}

-			s++
 			candidateS = e.table[hashLen(cv>>8, bestShortTableBits, bestShortLen)]
-			cv = load6432(src, s)
-			cv2 := load6432(src, s+1)
+			cv = load6432(src, s+1)
+			cv2 := load6432(src, s+2)
 			candidateL = e.longTable[hashLen(cv, bestLongTableBits, bestLongLen)]
 			candidateL2 := e.longTable[hashLen(cv2, bestLongTableBits, bestLongLen)]

 			// Short at s+1
-			m1 := matchAt(candidateS.offset-e.cur, s, uint32(cv), -1)
+			improve(&best, candidateS.offset-e.cur, s+1, uint32(cv), -1)
 			// Long at s+1, s+2
-			m2 := matchAt(candidateL.offset-e.cur, s, uint32(cv), -1)
-			m3 := matchAt(candidateL.prev-e.cur, s, uint32(cv), -1)
-			m4 := matchAt(candidateL2.offset-e.cur, s+1, uint32(cv2), -1)
-			m5 := matchAt(candidateL2.prev-e.cur, s+1, uint32(cv2), -1)
-			best = bestOf(bestOf(bestOf(best, &m1), &m2), bestOf(bestOf(&m3, &m4), &m5))
+			improve(&best, candidateL.offset-e.cur, s+1, uint32(cv), -1)
+			improve(&best, candidateL.prev-e.cur, s+1, uint32(cv), -1)
+			improve(&best, candidateL2.offset-e.cur, s+2, uint32(cv2), -1)
+			improve(&best, candidateL2.prev-e.cur, s+2, uint32(cv2), -1)
 			if false {
 				// Short at s+3.
 				// Too often worse...
-				m := matchAt(e.table[hashLen(cv2>>8, bestShortTableBits, bestShortLen)].offset-e.cur, s+2, uint32(cv2>>8), -1)
-				best = bestOf(best, &m)
+				improve(&best, e.table[hashLen(cv2>>8, bestShortTableBits, bestShortLen)].offset-e.cur, s+3, uint32(cv2>>8), -1)
 			}
-			// See if we can find a better match by checking where the current best ends.
-			// Use that offset to see if we can find a better full match.
-			if sAt := best.s + best.length; sAt < sLimit {
-				nextHashL := hashLen(load6432(src, sAt), bestLongTableBits, bestLongLen)
-				candidateEnd := e.longTable[nextHashL]
-				// Start check at a fixed offset to allow for a few mismatches.
-				// For this compression level 2 yields the best results.
-				const skipBeginning = 2
-				if pos := candidateEnd.offset - e.cur - best.length + skipBeginning; pos >= 0 {
-					m := matchAt(pos, best.s+skipBeginning, load3232(src, best.s+skipBeginning), -1)
-					bestEnd := bestOf(best, &m)
-					if pos := candidateEnd.prev - e.cur - best.length + skipBeginning; pos >= 0 {
-						m := matchAt(pos, best.s+skipBeginning, load3232(src, best.s+skipBeginning), -1)
-						bestEnd = bestOf(bestEnd, &m)
+
+			// Start check at a fixed offset to allow for a few mismatches.
+			// For this compression level 2 yields the best results.
+			// We cannot do this if we have already indexed this position.
+			const skipBeginning = 2
+			if best.s > s-skipBeginning {
+				// See if we can find a better match by checking where the current best ends.
+				// Use that offset to see if we can find a better full match.
+				if sAt := best.s + best.length; sAt < sLimit {
+					nextHashL := hashLen(load6432(src, sAt), bestLongTableBits, bestLongLen)
+					candidateEnd := e.longTable[nextHashL]
+
+					if off := candidateEnd.offset - e.cur - best.length + skipBeginning; off >= 0 {
+						improve(&best, off, best.s+skipBeginning, load3232(src, best.s+skipBeginning), -1)
+						if off := candidateEnd.prev - e.cur - best.length + skipBeginning; off >= 0 {
+							improve(&best, off, best.s+skipBeginning, load3232(src, best.s+skipBeginning), -1)
+						}
 					}
-					best = bestEnd
 				}
 			}
 		}
@@ -303,51 +341,34 @@ encodeLoop:

 		// We have a match, we can store the forward value
 		if best.rep > 0 {
-			s = best.s
 			var seq seq
 			seq.matchLen = uint32(best.length - zstdMinMatch)
-
-			// We might be able to match backwards.
-			// Extend as long as we can.
-			start := best.s
-			// We end the search early, so we don't risk 0 literals
-			// and have to do special offset treatment.
-			startLimit := nextEmit + 1
-
-			tMin := s - e.maxMatchOff
-			if tMin < 0 {
-				tMin = 0
+			if debugAsserts && s <= nextEmit {
+				panic("s <= nextEmit")
 			}
-			repIndex := best.offset
-			for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
-				repIndex--
-				start--
-				seq.matchLen++
-			}
-			addLiterals(&seq, start)
+			addLiterals(&seq, best.s)

-			// rep 0
-			seq.offset = uint32(best.rep)
+			// Repeat. If bit 4 is set, this is a non-lit repeat.
+			seq.offset = uint32(best.rep & 3)
 			if debugSequences {
 				println("repeat sequence", seq, "next s:", s)
 			}
 			blk.sequences = append(blk.sequences, seq)

-			// Index match start+1 (long) -> s - 1
-			index0 := s
+			// Index old s + 1 -> s - 1
+			index0 := s + 1
 			s = best.s + best.length

 			nextEmit = s
 			if s >= sLimit {
 				if debugEncoder {
 					println("repeat ended", s, best.length)
-
 				}
 				break encodeLoop
 			}
 			// Index skipped...
 			off := index0 + e.cur
-			for index0 < s-1 {
+			for index0 < s {
 				cv0 := load6432(src, index0)
 				h0 := hashLen(cv0, bestLongTableBits, bestLongLen)
 				h1 := hashLen(cv0, bestShortTableBits, bestShortLen)
@@ -357,17 +378,19 @@ encodeLoop:
 				index0++
 			}
 			switch best.rep {
-			case 2:
+			case 2, 4 | 1:
 				offset1, offset2 = offset2, offset1
-			case 3:
+			case 3, 4 | 2:
 				offset1, offset2, offset3 = offset3, offset1, offset2
+			case 4 | 3:
+				offset1, offset2, offset3 = offset1-1, offset1, offset2
 			}
-			cv = load6432(src, s)
 			continue
 		}

 		// A 4-byte match has been found. Update recent offsets.
 		// We'll later see if more than 4 bytes.
+		index0 := s + 1
 		s = best.s
 		t := best.offset
 		offset1, offset2, offset3 = s-t, offset1, offset2
@@ -380,22 +403,9 @@ encodeLoop:
 			panic("invalid offset")
 		}

-		// Extend the n-byte match as long as possible.
-		l := best.length
-
-		// Extend backwards
-		tMin := s - e.maxMatchOff
-		if tMin < 0 {
-			tMin = 0
-		}
-		for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
-			s--
-			t--
-			l++
-		}
-
 		// Write our sequence
 		var seq seq
+		l := best.length
 		seq.litLen = uint32(s - nextEmit)
 		seq.matchLen = uint32(l - zstdMinMatch)
 		if seq.litLen > 0 {
@@ -412,10 +422,8 @@ encodeLoop:
 			break encodeLoop
 		}

-		// Index match start+1 (long) -> s - 1
-		index0 := s - l + 1
-		// every entry
-		for index0 < s-1 {
+		// Index old s + 1 -> s - 1
+		for index0 < s {
 			cv0 := load6432(src, index0)
 			h0 := hashLen(cv0, bestLongTableBits, bestLongLen)
 			h1 := hashLen(cv0, bestShortTableBits, bestShortLen)
@@ -424,50 +432,6 @@ encodeLoop:
 			e.table[h1] = prevEntry{offset: off, prev: e.table[h1].offset}
 			index0++
 		}
-
-		cv = load6432(src, s)
-		if !canRepeat {
-			continue
-		}
-
-		// Check offset 2
-		for {
-			o2 := s - offset2
-			if load3232(src, o2) != uint32(cv) {
-				// Do regular search
-				break
-			}
-
-			// Store this, since we have it.
-			nextHashS := hashLen(cv, bestShortTableBits, bestShortLen)
-			nextHashL := hashLen(cv, bestLongTableBits, bestLongLen)
-
-			// We have at least 4 byte match.
-			// No need to check backwards. We come straight from a match
-			l := 4 + e.matchlen(s+4, o2+4, src)
-
-			e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: e.longTable[nextHashL].offset}
-			e.table[nextHashS] = prevEntry{offset: s + e.cur, prev: e.table[nextHashS].offset}
-			seq.matchLen = uint32(l) - zstdMinMatch
-			seq.litLen = 0
-
-			// Since litlen is always 0, this is offset 1.
-			seq.offset = 1
-			s += l
-			nextEmit = s
-			if debugSequences {
-				println("sequence", seq, "next s:", s)
-			}
-			blk.sequences = append(blk.sequences, seq)
-
-			// Swap offset 1 and 2.
-			offset1, offset2 = offset2, offset1
-			if s >= sLimit {
-				// Finished
-				break encodeLoop
-			}
-			cv = load6432(src, s)
-		}
 	}

 	if int(nextEmit) < len(src) {
--- a/vendor/github.com/klauspost/compress/zstd/enc_dfast.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go
@@ -1084,7 +1084,7 @@ func (e *doubleFastEncoderDict) Reset(d *dict, singleBlock bool) {
 			}
 		}
 		e.lastDictID = d.id
-		e.allDirty = true
+		allDirty = true
 	}
 	// Reset table to initial state
 	e.cur = e.maxMatchOff
--- a/vendor/github.com/klauspost/compress/zstd/enc_fast.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_fast.go
@@ -133,8 +133,7 @@ encodeLoop:
 			if canRepeat && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) {
 				// Consider history as well.
 				var seq seq
-				var length int32
-				length = 4 + e.matchlen(s+6, repIndex+4, src)
+				length := 4 + e.matchlen(s+6, repIndex+4, src)
 				seq.matchLen = uint32(length - zstdMinMatch)

 				// We might be able to match backwards.
@@ -645,8 +644,7 @@ encodeLoop:
 			if canRepeat && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) {
 				// Consider history as well.
 				var seq seq
-				var length int32
-				length = 4 + e.matchlen(s+6, repIndex+4, src)
+				length := 4 + e.matchlen(s+6, repIndex+4, src)

 				seq.matchLen = uint32(length - zstdMinMatch)

@@ -831,13 +829,12 @@ func (e *fastEncoderDict) Reset(d *dict, singleBlock bool) {
 		}
 		if true {
 			end := e.maxMatchOff + int32(len(d.content)) - 8
-			for i := e.maxMatchOff; i < end; i += 3 {
+			for i := e.maxMatchOff; i < end; i += 2 {
 				const hashLog = tableBits

 				cv := load6432(d.content, i-e.maxMatchOff)
-				nextHash := hashLen(cv, hashLog, tableFastHashLen)      // 0 -> 5
-				nextHash1 := hashLen(cv>>8, hashLog, tableFastHashLen)  // 1 -> 6
-				nextHash2 := hashLen(cv>>16, hashLog, tableFastHashLen) // 2 -> 7
+				nextHash := hashLen(cv, hashLog, tableFastHashLen)     // 0 -> 6
+				nextHash1 := hashLen(cv>>8, hashLog, tableFastHashLen) // 1 -> 7
 				e.dictTable[nextHash] = tableEntry{
 					val:    uint32(cv),
 					offset: i,
@@ -846,10 +843,6 @@ func (e *fastEncoderDict) Reset(d *dict, singleBlock bool) {
 					val:    uint32(cv >> 8),
 					offset: i + 1,
 				}
-				e.dictTable[nextHash2] = tableEntry{
-					val:    uint32(cv >> 16),
-					offset: i + 2,
-				}
 			}
 		}
 		e.lastDictID = d.id
--- a/vendor/github.com/klauspost/compress/zstd/encoder.go
+++ b/vendor/github.com/klauspost/compress/zstd/encoder.go
@@ -227,10 +227,7 @@ func (e *Encoder) nextBlock(final bool) error {
 			DictID:        e.o.dict.ID(),
 		}

-		dst, err := fh.appendTo(tmp[:0])
-		if err != nil {
-			return err
-		}
+		dst := fh.appendTo(tmp[:0])
 		s.headerWritten = true
 		s.wWg.Wait()
 		var n2 int
@@ -277,23 +274,9 @@ func (e *Encoder) nextBlock(final bool) error {
 			s.eofWritten = true
 		}

-		err := errIncompressible
-		// If we got the exact same number of literals as input,
-		// assume the literals cannot be compressed.
-		if len(src) != len(blk.literals) || len(src) != e.o.blockSize {
-			err = blk.encode(src, e.o.noEntropy, !e.o.allLitEntropy)
-		}
-		switch err {
-		case errIncompressible:
-			if debugEncoder {
-				println("Storing incompressible block as raw")
-			}
-			blk.encodeRaw(src)
-			// In fast mode, we do not transfer offsets, so we don't have to deal with changing the.
-		case nil:
-		default:
-			s.err = err
-			return err
+		s.err = blk.encode(src, e.o.noEntropy, !e.o.allLitEntropy)
+		if s.err != nil {
+			return s.err
 		}
 		_, s.err = s.w.Write(blk.output)
 		s.nWritten += int64(len(blk.output))
@@ -343,22 +326,8 @@ func (e *Encoder) nextBlock(final bool) error {
 				}
 				s.wWg.Done()
 			}()
-			err := errIncompressible
-			// If we got the exact same number of literals as input,
-			// assume the literals cannot be compressed.
-			if len(src) != len(blk.literals) || len(src) != e.o.blockSize {
-				err = blk.encode(src, e.o.noEntropy, !e.o.allLitEntropy)
-			}
-			switch err {
-			case errIncompressible:
-				if debugEncoder {
-					println("Storing incompressible block as raw")
-				}
-				blk.encodeRaw(src)
-				// In fast mode, we do not transfer offsets, so we don't have to deal with changing the.
-			case nil:
-			default:
-				s.writeErr = err
+			s.writeErr = blk.encode(src, e.o.noEntropy, !e.o.allLitEntropy)
+			if s.writeErr != nil {
 				return
 			}
 			_, s.writeErr = s.w.Write(blk.output)
@@ -511,7 +480,7 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
 				Checksum: false,
 				DictID:   0,
 			}
-			dst, _ = fh.appendTo(dst)
+			dst = fh.appendTo(dst)

 			// Write raw block as last one only.
 			var blk blockHeader
@@ -546,10 +515,7 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
 	if len(dst) == 0 && cap(dst) == 0 && len(src) < 1<<20 && !e.o.lowMem {
 		dst = make([]byte, 0, len(src))
 	}
-	dst, err := fh.appendTo(dst)
-	if err != nil {
-		panic(err)
-	}
+	dst = fh.appendTo(dst)

 	// If we can do everything in one block, prefer that.
 	if len(src) <= e.o.blockSize {
@@ -568,25 +534,15 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {

 		// If we got the exact same number of literals as input,
 		// assume the literals cannot be compressed.
-		err := errIncompressible
 		oldout := blk.output
-		if len(blk.literals) != len(src) || len(src) != e.o.blockSize {
-			// Output directly to dst
-			blk.output = dst
-			err = blk.encode(src, e.o.noEntropy, !e.o.allLitEntropy)
-		}
+		// Output directly to dst
+		blk.output = dst

-		switch err {
-		case errIncompressible:
-			if debugEncoder {
-				println("Storing incompressible block as raw")
-			}
-			dst = blk.encodeRawTo(dst, src)
-		case nil:
-			dst = blk.output
-		default:
+		err := blk.encode(src, e.o.noEntropy, !e.o.allLitEntropy)
+		if err != nil {
 			panic(err)
 		}
+		dst = blk.output
 		blk.output = oldout
 	} else {
 		enc.Reset(e.o.dict, false)
@@ -605,25 +561,11 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
 			if len(src) == 0 {
 				blk.last = true
 			}
-			err := errIncompressible
-			// If we got the exact same number of literals as input,
-			// assume the literals cannot be compressed.
-			if len(blk.literals) != len(todo) || len(todo) != e.o.blockSize {
-				err = blk.encode(todo, e.o.noEntropy, !e.o.allLitEntropy)
-			}
-
-			switch err {
-			case errIncompressible:
-				if debugEncoder {
-					println("Storing incompressible block as raw")
-				}
-				dst = blk.encodeRawTo(dst, todo)
-				blk.popOffsets()
-			case nil:
-				dst = append(dst, blk.output...)
-			default:
+			err := blk.encode(todo, e.o.noEntropy, !e.o.allLitEntropy)
+			if err != nil {
 				panic(err)
 			}
+			dst = append(dst, blk.output...)
 			blk.reset(nil)
 		}
 	}
@@ -633,6 +575,7 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
 	// Add padding with content from crypto/rand.Reader
 	if e.o.pad > 0 {
 		add := calcSkippableFrame(int64(len(dst)), int64(e.o.pad))
+		var err error
 		dst, err = skippableFrame(dst, add, rand.Reader)
 		if err != nil {
 			panic(err)
--- a/vendor/github.com/klauspost/compress/zstd/encoder_options.go
+++ b/vendor/github.com/klauspost/compress/zstd/encoder_options.go
@@ -39,7 +39,7 @@ func (o *encoderOptions) setDefault() {
 		blockSize:     maxCompressedBlockSize,
 		windowSize:    8 << 20,
 		level:         SpeedDefault,
-		allLitEntropy: true,
+		allLitEntropy: false,
 		lowMem:        false,
 	}
 }
@@ -129,7 +129,7 @@ func WithEncoderPadding(n int) EOption {
 		}
 		// No need to waste our time.
 		if n == 1 {
-			o.pad = 0
+			n = 0
 		}
 		if n > 1<<30 {
 			return fmt.Errorf("padding must less than 1GB (1<<30 bytes) ")
@@ -238,7 +238,7 @@ func WithEncoderLevel(l EncoderLevel) EOption {
 			}
 		}
 		if !o.customALEntropy {
-			o.allLitEntropy = l > SpeedFastest
+			o.allLitEntropy = l > SpeedDefault
 		}

 		return nil
--- a/vendor/github.com/klauspost/compress/zstd/framedec.go
+++ b/vendor/github.com/klauspost/compress/zstd/framedec.go
@@ -73,20 +73,20 @@ func (d *frameDec) reset(br byteBuffer) error {
 		switch err {
 		case io.EOF, io.ErrUnexpectedEOF:
 			return io.EOF
-		default:
-			return err
 		case nil:
 			signature[0] = b[0]
+		default:
+			return err
 		}
 		// Read the rest, don't allow io.ErrUnexpectedEOF
 		b, err = br.readSmall(3)
 		switch err {
 		case io.EOF:
 			return io.EOF
-		default:
-			return err
 		case nil:
 			copy(signature[1:], b)
+		default:
+			return err
 		}

 		if string(signature[1:4]) != skippableFrameMagic || signature[0]&0xf0 != 0x50 {
@@ -293,13 +293,9 @@ func (d *frameDec) next(block *blockDec) error {
 	return nil
 }

-// checkCRC will check the checksum if the frame has one.
+// checkCRC will check the checksum, assuming the frame has one.
 // Will return ErrCRCMismatch if crc check failed, otherwise nil.
 func (d *frameDec) checkCRC() error {
-	if !d.HasCheckSum {
-		return nil
-	}
-
 	// We can overwrite upper tmp now
 	buf, err := d.rawInput.readSmall(4)
 	if err != nil {
@@ -307,10 +303,6 @@ func (d *frameDec) checkCRC() error {
 		return err
 	}

-	if d.o.ignoreChecksum {
-		return nil
-	}
-
 	want := binary.LittleEndian.Uint32(buf[:4])
 	got := uint32(d.crc.Sum64())

@@ -326,17 +318,13 @@ func (d *frameDec) checkCRC() error {
 	return nil
 }

-// consumeCRC reads the checksum data if the frame has one.
+// consumeCRC skips over the checksum, assuming the frame has one.
 func (d *frameDec) consumeCRC() error {
-	if d.HasCheckSum {
-		_, err := d.rawInput.readSmall(4)
-		if err != nil {
-			println("CRC missing?", err)
-			return err
-		}
+	_, err := d.rawInput.readSmall(4)
+	if err != nil {
+		println("CRC missing?", err)
 	}
-
-	return nil
+	return err
 }

 // runDecoder will run the decoder for the remainder of the frame.
@@ -415,15 +403,8 @@ func (d *frameDec) runDecoder(dst []byte, dec *blockDec) ([]byte, error) {
 			if d.o.ignoreChecksum {
 				err = d.consumeCRC()
 			} else {
-				var n int
-				n, err = d.crc.Write(dst[crcStart:])
-				if err == nil {
-					if n != len(dst)-crcStart {
-						err = io.ErrShortWrite
-					} else {
-						err = d.checkCRC()
-					}
-				}
+				d.crc.Write(dst[crcStart:])
+				err = d.checkCRC()
 			}
 		}
 	}
--- a/vendor/github.com/klauspost/compress/zstd/frameenc.go
+++ b/vendor/github.com/klauspost/compress/zstd/frameenc.go
@@ -22,7 +22,7 @@ type frameHeader struct {

 const maxHeaderSize = 14

-func (f frameHeader) appendTo(dst []byte) ([]byte, error) {
+func (f frameHeader) appendTo(dst []byte) []byte {
 	dst = append(dst, frameMagic...)
 	var fhd uint8
 	if f.Checksum {
@@ -88,7 +88,7 @@ func (f frameHeader) appendTo(dst []byte) ([]byte, error) {
 	default:
 		panic("invalid fcs")
 	}
-	return dst, nil
+	return dst
 }

 const skippableFrameHeader = 4 + 4
--- a/vendor/github.com/klauspost/compress/zstd/matchlen_amd64.go
+++ b/vendor/github.com/klauspost/compress/zstd/matchlen_amd64.go
@@ -0,0 +1,16 @@
+//go:build amd64 && !appengine && !noasm && gc
+// +build amd64,!appengine,!noasm,gc
+
+// Copyright 2019+ Klaus Post. All rights reserved.
+// License information can be found in the LICENSE file.
+
+package zstd
+
+// matchLen returns how many bytes match in a and b
+//
+// It assumes that:
+//
+//	len(a) <= len(b) and len(a) > 0
+//
+//go:noescape
+func matchLen(a []byte, b []byte) int
--- a/vendor/github.com/klauspost/compress/zstd/matchlen_amd64.s
+++ b/vendor/github.com/klauspost/compress/zstd/matchlen_amd64.s
@@ -0,0 +1,68 @@
+// Copied from S2 implementation.
+
+//go:build !appengine && !noasm && gc && !noasm
+
+#include "textflag.h"
+
+// func matchLen(a []byte, b []byte) int
+// Requires: BMI
+TEXT ·matchLen(SB), NOSPLIT, $0-56
+	MOVQ a_base+0(FP), AX
+	MOVQ b_base+24(FP), CX
+	MOVQ a_len+8(FP), DX
+
+	// matchLen
+	XORL SI, SI
+	CMPL DX, $0x08
+	JB   matchlen_match4_standalone
+
+matchlen_loopback_standalone:
+	MOVQ  (AX)(SI*1), BX
+	XORQ  (CX)(SI*1), BX
+	TESTQ BX, BX
+	JZ    matchlen_loop_standalone
+
+#ifdef GOAMD64_v3
+	TZCNTQ BX, BX
+#else
+	BSFQ BX, BX
+#endif
+	SARQ $0x03, BX
+	LEAL (SI)(BX*1), SI
+	JMP  gen_match_len_end
+
+matchlen_loop_standalone:
+	LEAL -8(DX), DX
+	LEAL 8(SI), SI
+	CMPL DX, $0x08
+	JAE  matchlen_loopback_standalone
+
+matchlen_match4_standalone:
+	CMPL DX, $0x04
+	JB   matchlen_match2_standalone
+	MOVL (AX)(SI*1), BX
+	CMPL (CX)(SI*1), BX
+	JNE  matchlen_match2_standalone
+	LEAL -4(DX), DX
+	LEAL 4(SI), SI
+
+matchlen_match2_standalone:
+	CMPL DX, $0x02
+	JB   matchlen_match1_standalone
+	MOVW (AX)(SI*1), BX
+	CMPW (CX)(SI*1), BX
+	JNE  matchlen_match1_standalone
+	LEAL -2(DX), DX
+	LEAL 2(SI), SI
+
+matchlen_match1_standalone:
+	CMPL DX, $0x01
+	JB   gen_match_len_end
+	MOVB (AX)(SI*1), BL
+	CMPB (CX)(SI*1), BL
+	JNE  gen_match_len_end
+	INCL SI
+
+gen_match_len_end:
+	MOVQ SI, ret+48(FP)
+	RET
--- a/vendor/github.com/klauspost/compress/zstd/matchlen_generic.go
+++ b/vendor/github.com/klauspost/compress/zstd/matchlen_generic.go
@@ -0,0 +1,33 @@
+//go:build !amd64 || appengine || !gc || noasm
+// +build !amd64 appengine !gc noasm
+
+// Copyright 2019+ Klaus Post. All rights reserved.
+// License information can be found in the LICENSE file.
+
+package zstd
+
+import (
+	"encoding/binary"
+	"math/bits"
+)
+
+// matchLen returns the maximum common prefix length of a and b.
+// a must be the shortest of the two.
+func matchLen(a, b []byte) (n int) {
+	for ; len(a) >= 8 && len(b) >= 8; a, b = a[8:], b[8:] {
+		diff := binary.LittleEndian.Uint64(a) ^ binary.LittleEndian.Uint64(b)
+		if diff != 0 {
+			return n + bits.TrailingZeros64(diff)>>3
+		}
+		n += 8
+	}
+
+	for i := range a {
+		if a[i] != b[i] {
+			break
+		}
+		n++
+	}
+	return n
+
+}
--- a/vendor/github.com/klauspost/compress/zstd/seqdec.go
+++ b/vendor/github.com/klauspost/compress/zstd/seqdec.go
@@ -236,13 +236,16 @@ func (s *sequenceDecs) decodeSync(hist []byte) error {
 		maxBlockSize = s.windowSize
 	}

+	if debugDecoder {
+		println("decodeSync: decoding", seqs, "sequences", br.remain(), "bits remain on stream")
+	}
 	for i := seqs - 1; i >= 0; i-- {
 		if br.overread() {
-			printf("reading sequence %d, exceeded available data\n", seqs-i)
+			printf("reading sequence %d, exceeded available data. Overread by %d\n", seqs-i, -br.remain())
 			return io.ErrUnexpectedEOF
 		}
 		var ll, mo, ml int
-		if br.off > 4+((maxOffsetBits+16+16)>>3) {
+		if len(br.in) > 4+((maxOffsetBits+16+16)>>3) {
 			// inlined function:
 			// ll, mo, ml = s.nextFast(br, llState, mlState, ofState)

@@ -314,9 +317,6 @@ func (s *sequenceDecs) decodeSync(hist []byte) error {
 		}
 		size := ll + ml + len(out)
 		if size-startSize > maxBlockSize {
-			if size-startSize == 424242 {
-				panic("here")
-			}
 			return fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
 		}
 		if size > cap(out) {
@@ -427,8 +427,7 @@ func (s *sequenceDecs) decodeSync(hist []byte) error {
 		}
 	}

-	// Check if space for literals
-	if size := len(s.literals) + len(s.out) - startSize; size > maxBlockSize {
+	if size := len(s.literals) + len(out) - startSize; size > maxBlockSize {
 		return fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
 	}

@@ -453,18 +452,13 @@ func (s *sequenceDecs) next(br *bitReader, llState, mlState, ofState decSymbol)

 	// extra bits are stored in reverse order.
 	br.fill()
-	if s.maxBits <= 32 {
-		mo += br.getBits(moB)
-		ml += br.getBits(mlB)
-		ll += br.getBits(llB)
-	} else {
-		mo += br.getBits(moB)
+	mo += br.getBits(moB)
+	if s.maxBits > 32 {
 		br.fill()
-		// matchlength+literal length, max 32 bits
-		ml += br.getBits(mlB)
-		ll += br.getBits(llB)
-
 	}
+	// matchlength+literal length, max 32 bits
+	ml += br.getBits(mlB)
+	ll += br.getBits(llB)
 	mo = s.adjustOffset(mo, ll, moB)
 	return
 }
--- a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go
+++ b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go
@@ -5,6 +5,7 @@ package zstd

 import (
 	"fmt"
+	"io"

 	"github.com/klauspost/compress/internal/cpuinfo"
 )
@@ -134,6 +135,9 @@ func (s *sequenceDecs) decodeSyncSimple(hist []byte) (bool, error) {
 		return true, fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available",
 			ctx.ll, ctx.litRemain+ctx.ll)

+	case errorOverread:
+		return true, io.ErrUnexpectedEOF
+
 	case errorNotEnoughSpace:
 		size := ctx.outPosition + ctx.ll + ctx.ml
 		if debugDecoder {
@@ -148,7 +152,6 @@ func (s *sequenceDecs) decodeSyncSimple(hist []byte) (bool, error) {
 	s.seqSize += ctx.litRemain
 	if s.seqSize > maxBlockSize {
 		return true, fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
-
 	}
 	err := br.close()
 	if err != nil {
@@ -203,6 +206,9 @@ const errorNotEnoughLiterals = 4
 // error reported when capacity of `out` is too small
 const errorNotEnoughSpace = 5

+// error reported when bits are overread.
+const errorOverread = 6
+
 // sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm.
 //
 // Please refer to seqdec_generic.go for the reference implementation.
@@ -248,6 +254,10 @@ func (s *sequenceDecs) decode(seqs []seqVals) error {
 		litRemain: len(s.literals),
 	}

+	if debugDecoder {
+		println("decode: decoding", len(seqs), "sequences", br.remain(), "bits remain on stream")
+	}
+
 	s.seqSize = 0
 	lte56bits := s.maxBits+s.offsets.fse.actualTableLog+s.matchLengths.fse.actualTableLog+s.litLengths.fse.actualTableLog <= 56
 	var errCode int
@@ -278,6 +288,8 @@ func (s *sequenceDecs) decode(seqs []seqVals) error {
 		case errorNotEnoughLiterals:
 			ll := ctx.seqs[i].ll
 			return fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available", ll, ctx.litRemain+ll)
+		case errorOverread:
+			return io.ErrUnexpectedEOF
 		}

 		return fmt.Errorf("sequenceDecs_decode_amd64 returned erronous code %d", errCode)
@@ -292,6 +304,9 @@ func (s *sequenceDecs) decode(seqs []seqVals) error {
 	if s.seqSize > maxBlockSize {
 		return fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
 	}
+	if debugDecoder {
+		println("decode: ", br.remain(), "bits remain on stream. code:", errCode)
+	}
 	err := br.close()
 	if err != nil {
 		printf("Closing sequences: %v, %+v\n", err, *br)
--- a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s
+++ b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s
@@ -5,11 +5,11 @@
 // func sequenceDecs_decode_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
 // Requires: CMOV
 TEXT ·sequenceDecs_decode_amd64(SB), $8-32
-	MOVQ    br+8(FP), AX
-	MOVQ    32(AX), DX
-	MOVBQZX 40(AX), BX
-	MOVQ    24(AX), SI
-	MOVQ    (AX), AX
+	MOVQ    br+8(FP), CX
+	MOVQ    24(CX), DX
+	MOVBQZX 32(CX), BX
+	MOVQ    (CX), AX
+	MOVQ    8(CX), SI
 	ADDQ    SI, AX
 	MOVQ    AX, (SP)
 	MOVQ    ctx+16(FP), AX
@@ -38,7 +38,7 @@ sequenceDecs_decode_amd64_main_loop:

 sequenceDecs_decode_amd64_fill_byte_by_byte:
 	CMPQ    SI, $0x00
-	JLE     sequenceDecs_decode_amd64_fill_end
+	JLE     sequenceDecs_decode_amd64_fill_check_overread
 	CMPQ    BX, $0x07
 	JLE     sequenceDecs_decode_amd64_fill_end
 	SHLQ    $0x08, DX
@@ -49,6 +49,10 @@ sequenceDecs_decode_amd64_fill_byte_by_byte:
 	ORQ     AX, DX
 	JMP     sequenceDecs_decode_amd64_fill_byte_by_byte

+sequenceDecs_decode_amd64_fill_check_overread:
+	CMPQ BX, $0x40
+	JA   error_overread
+
 sequenceDecs_decode_amd64_fill_end:
 	// Update offset
 	MOVQ  R9, AX
@@ -105,7 +109,7 @@ sequenceDecs_decode_amd64_ml_update_zero:

 sequenceDecs_decode_amd64_fill_2_byte_by_byte:
 	CMPQ    SI, $0x00
-	JLE     sequenceDecs_decode_amd64_fill_2_end
+	JLE     sequenceDecs_decode_amd64_fill_2_check_overread
 	CMPQ    BX, $0x07
 	JLE     sequenceDecs_decode_amd64_fill_2_end
 	SHLQ    $0x08, DX
@@ -116,6 +120,10 @@ sequenceDecs_decode_amd64_fill_2_byte_by_byte:
 	ORQ     AX, DX
 	JMP     sequenceDecs_decode_amd64_fill_2_byte_by_byte

+sequenceDecs_decode_amd64_fill_2_check_overread:
+	CMPQ BX, $0x40
+	JA   error_overread
+
 sequenceDecs_decode_amd64_fill_2_end:
 	// Update literal length
 	MOVQ  DI, AX
@@ -293,9 +301,9 @@ sequenceDecs_decode_amd64_match_len_ofs_ok:
 	MOVQ R12, 152(AX)
 	MOVQ R13, 160(AX)
 	MOVQ br+8(FP), AX
-	MOVQ DX, 32(AX)
-	MOVB BL, 40(AX)
-	MOVQ SI, 24(AX)
+	MOVQ DX, 24(AX)
+	MOVB BL, 32(AX)
+	MOVQ SI, 8(AX)

 	// Return success
 	MOVQ $0x00000000, ret+24(FP)
@@ -320,14 +328,19 @@ error_not_enough_literals:
 	MOVQ $0x00000004, ret+24(FP)
 	RET

+	// Return with overread error
+error_overread:
+	MOVQ $0x00000006, ret+24(FP)
+	RET
+
 // func sequenceDecs_decode_56_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
 // Requires: CMOV
 TEXT ·sequenceDecs_decode_56_amd64(SB), $8-32
-	MOVQ    br+8(FP), AX
-	MOVQ    32(AX), DX
-	MOVBQZX 40(AX), BX
-	MOVQ    24(AX), SI
-	MOVQ    (AX), AX
+	MOVQ    br+8(FP), CX
+	MOVQ    24(CX), DX
+	MOVBQZX 32(CX), BX
+	MOVQ    (CX), AX
+	MOVQ    8(CX), SI
 	ADDQ    SI, AX
 	MOVQ    AX, (SP)
 	MOVQ    ctx+16(FP), AX
@@ -356,7 +369,7 @@ sequenceDecs_decode_56_amd64_main_loop:

 sequenceDecs_decode_56_amd64_fill_byte_by_byte:
 	CMPQ    SI, $0x00
-	JLE     sequenceDecs_decode_56_amd64_fill_end
+	JLE     sequenceDecs_decode_56_amd64_fill_check_overread
 	CMPQ    BX, $0x07
 	JLE     sequenceDecs_decode_56_amd64_fill_end
 	SHLQ    $0x08, DX
@@ -367,6 +380,10 @@ sequenceDecs_decode_56_amd64_fill_byte_by_byte:
 	ORQ     AX, DX
 	JMP     sequenceDecs_decode_56_amd64_fill_byte_by_byte

+sequenceDecs_decode_56_amd64_fill_check_overread:
+	CMPQ BX, $0x40
+	JA   error_overread
+
 sequenceDecs_decode_56_amd64_fill_end:
 	// Update offset
 	MOVQ  R9, AX
@@ -586,9 +603,9 @@ sequenceDecs_decode_56_amd64_match_len_ofs_ok:
 	MOVQ R12, 152(AX)
 	MOVQ R13, 160(AX)
 	MOVQ br+8(FP), AX
-	MOVQ DX, 32(AX)
-	MOVB BL, 40(AX)
-	MOVQ SI, 24(AX)
+	MOVQ DX, 24(AX)
+	MOVB BL, 32(AX)
+	MOVQ SI, 8(AX)

 	// Return success
 	MOVQ $0x00000000, ret+24(FP)
@@ -613,14 +630,19 @@ error_not_enough_literals:
 	MOVQ $0x00000004, ret+24(FP)
 	RET

+	// Return with overread error
+error_overread:
+	MOVQ $0x00000006, ret+24(FP)
+	RET
+
 // func sequenceDecs_decode_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
 // Requires: BMI, BMI2, CMOV
 TEXT ·sequenceDecs_decode_bmi2(SB), $8-32
-	MOVQ    br+8(FP), CX
-	MOVQ    32(CX), AX
-	MOVBQZX 40(CX), DX
-	MOVQ    24(CX), BX
-	MOVQ    (CX), CX
+	MOVQ    br+8(FP), BX
+	MOVQ    24(BX), AX
+	MOVBQZX 32(BX), DX
+	MOVQ    (BX), CX
+	MOVQ    8(BX), BX
 	ADDQ    BX, CX
 	MOVQ    CX, (SP)
 	MOVQ    ctx+16(FP), CX
@@ -649,7 +671,7 @@ sequenceDecs_decode_bmi2_main_loop:

 sequenceDecs_decode_bmi2_fill_byte_by_byte:
 	CMPQ    BX, $0x00
-	JLE     sequenceDecs_decode_bmi2_fill_end
+	JLE     sequenceDecs_decode_bmi2_fill_check_overread
 	CMPQ    DX, $0x07
 	JLE     sequenceDecs_decode_bmi2_fill_end
 	SHLQ    $0x08, AX
@@ -660,6 +682,10 @@ sequenceDecs_decode_bmi2_fill_byte_by_byte:
 	ORQ     CX, AX
 	JMP     sequenceDecs_decode_bmi2_fill_byte_by_byte

+sequenceDecs_decode_bmi2_fill_check_overread:
+	CMPQ DX, $0x40
+	JA   error_overread
+
 sequenceDecs_decode_bmi2_fill_end:
 	// Update offset
 	MOVQ   $0x00000808, CX
@@ -700,7 +726,7 @@ sequenceDecs_decode_bmi2_fill_end:

 sequenceDecs_decode_bmi2_fill_2_byte_by_byte:
 	CMPQ    BX, $0x00
-	JLE     sequenceDecs_decode_bmi2_fill_2_end
+	JLE     sequenceDecs_decode_bmi2_fill_2_check_overread
 	CMPQ    DX, $0x07
 	JLE     sequenceDecs_decode_bmi2_fill_2_end
 	SHLQ    $0x08, AX
@@ -711,6 +737,10 @@ sequenceDecs_decode_bmi2_fill_2_byte_by_byte:
 	ORQ     CX, AX
 	JMP     sequenceDecs_decode_bmi2_fill_2_byte_by_byte

+sequenceDecs_decode_bmi2_fill_2_check_overread:
+	CMPQ DX, $0x40
+	JA   error_overread
+
 sequenceDecs_decode_bmi2_fill_2_end:
 	// Update literal length
 	MOVQ   $0x00000808, CX
@@ -862,9 +892,9 @@ sequenceDecs_decode_bmi2_match_len_ofs_ok:
 	MOVQ R11, 152(CX)
 	MOVQ R12, 160(CX)
 	MOVQ br+8(FP), CX
-	MOVQ AX, 32(CX)
-	MOVB DL, 40(CX)
-	MOVQ BX, 24(CX)
+	MOVQ AX, 24(CX)
+	MOVB DL, 32(CX)
+	MOVQ BX, 8(CX)

 	// Return success
 	MOVQ $0x00000000, ret+24(FP)
@@ -889,14 +919,19 @@ error_not_enough_literals:
 	MOVQ $0x00000004, ret+24(FP)
 	RET

+	// Return with overread error
+error_overread:
+	MOVQ $0x00000006, ret+24(FP)
+	RET
+
 // func sequenceDecs_decode_56_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
 // Requires: BMI, BMI2, CMOV
 TEXT ·sequenceDecs_decode_56_bmi2(SB), $8-32
-	MOVQ    br+8(FP), CX
-	MOVQ    32(CX), AX
-	MOVBQZX 40(CX), DX
-	MOVQ    24(CX), BX
-	MOVQ    (CX), CX
+	MOVQ    br+8(FP), BX
+	MOVQ    24(BX), AX
+	MOVBQZX 32(BX), DX
+	MOVQ    (BX), CX
+	MOVQ    8(BX), BX
 	ADDQ    BX, CX
 	MOVQ    CX, (SP)
 	MOVQ    ctx+16(FP), CX
@@ -925,7 +960,7 @@ sequenceDecs_decode_56_bmi2_main_loop:

 sequenceDecs_decode_56_bmi2_fill_byte_by_byte:
 	CMPQ    BX, $0x00
-	JLE     sequenceDecs_decode_56_bmi2_fill_end
+	JLE     sequenceDecs_decode_56_bmi2_fill_check_overread
 	CMPQ    DX, $0x07
 	JLE     sequenceDecs_decode_56_bmi2_fill_end
 	SHLQ    $0x08, AX
@@ -936,6 +971,10 @@ sequenceDecs_decode_56_bmi2_fill_byte_by_byte:
 	ORQ     CX, AX
 	JMP     sequenceDecs_decode_56_bmi2_fill_byte_by_byte

+sequenceDecs_decode_56_bmi2_fill_check_overread:
+	CMPQ DX, $0x40
+	JA   error_overread
+
 sequenceDecs_decode_56_bmi2_fill_end:
 	// Update offset
 	MOVQ   $0x00000808, CX
@@ -1113,9 +1152,9 @@ sequenceDecs_decode_56_bmi2_match_len_ofs_ok:
 	MOVQ R11, 152(CX)
 	MOVQ R12, 160(CX)
 	MOVQ br+8(FP), CX
-	MOVQ AX, 32(CX)
-	MOVB DL, 40(CX)
-	MOVQ BX, 24(CX)
+	MOVQ AX, 24(CX)
+	MOVB DL, 32(CX)
+	MOVQ BX, 8(CX)

 	// Return success
 	MOVQ $0x00000000, ret+24(FP)
@@ -1140,6 +1179,11 @@ error_not_enough_literals:
 	MOVQ $0x00000004, ret+24(FP)
 	RET

+	// Return with overread error
+error_overread:
+	MOVQ $0x00000006, ret+24(FP)
+	RET
+
 // func sequenceDecs_executeSimple_amd64(ctx *executeAsmContext) bool
 // Requires: SSE
 TEXT ·sequenceDecs_executeSimple_amd64(SB), $8-9
@@ -1753,11 +1797,11 @@ empty_seqs:
 // func sequenceDecs_decodeSync_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
 // Requires: CMOV, SSE
 TEXT ·sequenceDecs_decodeSync_amd64(SB), $64-32
-	MOVQ    br+8(FP), AX
-	MOVQ    32(AX), DX
-	MOVBQZX 40(AX), BX
-	MOVQ    24(AX), SI
-	MOVQ    (AX), AX
+	MOVQ    br+8(FP), CX
+	MOVQ    24(CX), DX
+	MOVBQZX 32(CX), BX
+	MOVQ    (CX), AX
+	MOVQ    8(CX), SI
 	ADDQ    SI, AX
 	MOVQ    AX, (SP)
 	MOVQ    ctx+16(FP), AX
@@ -1804,7 +1848,7 @@ sequenceDecs_decodeSync_amd64_main_loop:

 sequenceDecs_decodeSync_amd64_fill_byte_by_byte:
 	CMPQ    SI, $0x00
-	JLE     sequenceDecs_decodeSync_amd64_fill_end
+	JLE     sequenceDecs_decodeSync_amd64_fill_check_overread
 	CMPQ    BX, $0x07
 	JLE     sequenceDecs_decodeSync_amd64_fill_end
 	SHLQ    $0x08, DX
@@ -1815,6 +1859,10 @@ sequenceDecs_decodeSync_amd64_fill_byte_by_byte:
 	ORQ     AX, DX
 	JMP     sequenceDecs_decodeSync_amd64_fill_byte_by_byte

+sequenceDecs_decodeSync_amd64_fill_check_overread:
+	CMPQ BX, $0x40
+	JA   error_overread
+
 sequenceDecs_decodeSync_amd64_fill_end:
 	// Update offset
 	MOVQ  R9, AX
@@ -1871,7 +1919,7 @@ sequenceDecs_decodeSync_amd64_ml_update_zero:

 sequenceDecs_decodeSync_amd64_fill_2_byte_by_byte:
 	CMPQ    SI, $0x00
-	JLE     sequenceDecs_decodeSync_amd64_fill_2_end
+	JLE     sequenceDecs_decodeSync_amd64_fill_2_check_overread
 	CMPQ    BX, $0x07
 	JLE     sequenceDecs_decodeSync_amd64_fill_2_end
 	SHLQ    $0x08, DX
@@ -1882,6 +1930,10 @@ sequenceDecs_decodeSync_amd64_fill_2_byte_by_byte:
 	ORQ     AX, DX
 	JMP     sequenceDecs_decodeSync_amd64_fill_2_byte_by_byte

+sequenceDecs_decodeSync_amd64_fill_2_check_overread:
+	CMPQ BX, $0x40
+	JA   error_overread
+
 sequenceDecs_decodeSync_amd64_fill_2_end:
 	// Update literal length
 	MOVQ  DI, AX
@@ -2243,9 +2295,9 @@ handle_loop:

 loop_finished:
 	MOVQ br+8(FP), AX
-	MOVQ DX, 32(AX)
-	MOVB BL, 40(AX)
-	MOVQ SI, 24(AX)
+	MOVQ DX, 24(AX)
+	MOVB BL, 32(AX)
+	MOVQ SI, 8(AX)

 	// Update the context
 	MOVQ ctx+16(FP), AX
@@ -2291,6 +2343,11 @@ error_not_enough_literals:
 	MOVQ $0x00000004, ret+24(FP)
 	RET

+	// Return with overread error
+error_overread:
+	MOVQ $0x00000006, ret+24(FP)
+	RET
+
 	// Return with not enough output space error
 error_not_enough_space:
 	MOVQ ctx+16(FP), AX
@@ -2305,11 +2362,11 @@ error_not_enough_space:
 // func sequenceDecs_decodeSync_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
 // Requires: BMI, BMI2, CMOV, SSE
 TEXT ·sequenceDecs_decodeSync_bmi2(SB), $64-32
-	MOVQ    br+8(FP), CX
-	MOVQ    32(CX), AX
-	MOVBQZX 40(CX), DX
-	MOVQ    24(CX), BX
-	MOVQ    (CX), CX
+	MOVQ    br+8(FP), BX
+	MOVQ    24(BX), AX
+	MOVBQZX 32(BX), DX
+	MOVQ    (BX), CX
+	MOVQ    8(BX), BX
 	ADDQ    BX, CX
 	MOVQ    CX, (SP)
 	MOVQ    ctx+16(FP), CX
@@ -2356,7 +2413,7 @@ sequenceDecs_decodeSync_bmi2_main_loop:

 sequenceDecs_decodeSync_bmi2_fill_byte_by_byte:
 	CMPQ    BX, $0x00
-	JLE     sequenceDecs_decodeSync_bmi2_fill_end
+	JLE     sequenceDecs_decodeSync_bmi2_fill_check_overread
 	CMPQ    DX, $0x07
 	JLE     sequenceDecs_decodeSync_bmi2_fill_end
 	SHLQ    $0x08, AX
@@ -2367,6 +2424,10 @@ sequenceDecs_decodeSync_bmi2_fill_byte_by_byte:
 	ORQ     CX, AX
 	JMP     sequenceDecs_decodeSync_bmi2_fill_byte_by_byte

+sequenceDecs_decodeSync_bmi2_fill_check_overread:
+	CMPQ DX, $0x40
+	JA   error_overread
+
 sequenceDecs_decodeSync_bmi2_fill_end:
 	// Update offset
 	MOVQ   $0x00000808, CX
@@ -2407,7 +2468,7 @@ sequenceDecs_decodeSync_bmi2_fill_end:

 sequenceDecs_decodeSync_bmi2_fill_2_byte_by_byte:
 	CMPQ    BX, $0x00
-	JLE     sequenceDecs_decodeSync_bmi2_fill_2_end
+	JLE     sequenceDecs_decodeSync_bmi2_fill_2_check_overread
 	CMPQ    DX, $0x07
 	JLE     sequenceDecs_decodeSync_bmi2_fill_2_end
 	SHLQ    $0x08, AX
@@ -2418,6 +2479,10 @@ sequenceDecs_decodeSync_bmi2_fill_2_byte_by_byte:
 	ORQ     CX, AX
 	JMP     sequenceDecs_decodeSync_bmi2_fill_2_byte_by_byte

+sequenceDecs_decodeSync_bmi2_fill_2_check_overread:
+	CMPQ DX, $0x40
+	JA   error_overread
+
 sequenceDecs_decodeSync_bmi2_fill_2_end:
 	// Update literal length
 	MOVQ   $0x00000808, CX
@@ -2753,9 +2818,9 @@ handle_loop:

 loop_finished:
 	MOVQ br+8(FP), CX
-	MOVQ AX, 32(CX)
-	MOVB DL, 40(CX)
-	MOVQ BX, 24(CX)
+	MOVQ AX, 24(CX)
+	MOVB DL, 32(CX)
+	MOVQ BX, 8(CX)

 	// Update the context
 	MOVQ ctx+16(FP), AX
@@ -2801,6 +2866,11 @@ error_not_enough_literals:
 	MOVQ $0x00000004, ret+24(FP)
 	RET

+	// Return with overread error
+error_overread:
+	MOVQ $0x00000006, ret+24(FP)
+	RET
+
 	// Return with not enough output space error
 error_not_enough_space:
 	MOVQ ctx+16(FP), AX
@@ -2815,11 +2885,11 @@ error_not_enough_space:
 // func sequenceDecs_decodeSync_safe_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
 // Requires: CMOV, SSE
 TEXT ·sequenceDecs_decodeSync_safe_amd64(SB), $64-32
-	MOVQ    br+8(FP), AX
-	MOVQ    32(AX), DX
-	MOVBQZX 40(AX), BX
-	MOVQ    24(AX), SI
-	MOVQ    (AX), AX
+	MOVQ    br+8(FP), CX
+	MOVQ    24(CX), DX
+	MOVBQZX 32(CX), BX
+	MOVQ    (CX), AX
+	MOVQ    8(CX), SI
 	ADDQ    SI, AX
 	MOVQ    AX, (SP)
 	MOVQ    ctx+16(FP), AX
@@ -2866,7 +2936,7 @@ sequenceDecs_decodeSync_safe_amd64_main_loop:

 sequenceDecs_decodeSync_safe_amd64_fill_byte_by_byte:
 	CMPQ    SI, $0x00
-	JLE     sequenceDecs_decodeSync_safe_amd64_fill_end
+	JLE     sequenceDecs_decodeSync_safe_amd64_fill_check_overread
 	CMPQ    BX, $0x07
 	JLE     sequenceDecs_decodeSync_safe_amd64_fill_end
 	SHLQ    $0x08, DX
@@ -2877,6 +2947,10 @@ sequenceDecs_decodeSync_safe_amd64_fill_byte_by_byte:
 	ORQ     AX, DX
 	JMP     sequenceDecs_decodeSync_safe_amd64_fill_byte_by_byte

+sequenceDecs_decodeSync_safe_amd64_fill_check_overread:
+	CMPQ BX, $0x40
+	JA   error_overread
+
 sequenceDecs_decodeSync_safe_amd64_fill_end:
 	// Update offset
 	MOVQ  R9, AX
@@ -2933,7 +3007,7 @@ sequenceDecs_decodeSync_safe_amd64_ml_update_zero:

 sequenceDecs_decodeSync_safe_amd64_fill_2_byte_by_byte:
 	CMPQ    SI, $0x00
-	JLE     sequenceDecs_decodeSync_safe_amd64_fill_2_end
+	JLE     sequenceDecs_decodeSync_safe_amd64_fill_2_check_overread
 	CMPQ    BX, $0x07
 	JLE     sequenceDecs_decodeSync_safe_amd64_fill_2_end
 	SHLQ    $0x08, DX
@@ -2944,6 +3018,10 @@ sequenceDecs_decodeSync_safe_amd64_fill_2_byte_by_byte:
 	ORQ     AX, DX
 	JMP     sequenceDecs_decodeSync_safe_amd64_fill_2_byte_by_byte

+sequenceDecs_decodeSync_safe_amd64_fill_2_check_overread:
+	CMPQ BX, $0x40
+	JA   error_overread
+
 sequenceDecs_decodeSync_safe_amd64_fill_2_end:
 	// Update literal length
 	MOVQ  DI, AX
@@ -3407,9 +3485,9 @@ handle_loop:

 loop_finished:
 	MOVQ br+8(FP), AX
-	MOVQ DX, 32(AX)
-	MOVB BL, 40(AX)
-	MOVQ SI, 24(AX)
+	MOVQ DX, 24(AX)
+	MOVB BL, 32(AX)
+	MOVQ SI, 8(AX)

 	// Update the context
 	MOVQ ctx+16(FP), AX
@@ -3455,6 +3533,11 @@ error_not_enough_literals:
 	MOVQ $0x00000004, ret+24(FP)
 	RET

+	// Return with overread error
+error_overread:
+	MOVQ $0x00000006, ret+24(FP)
+	RET
+
 	// Return with not enough output space error
 error_not_enough_space:
 	MOVQ ctx+16(FP), AX
@@ -3469,11 +3552,11 @@ error_not_enough_space:
 // func sequenceDecs_decodeSync_safe_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
 // Requires: BMI, BMI2, CMOV, SSE
 TEXT ·sequenceDecs_decodeSync_safe_bmi2(SB), $64-32
-	MOVQ    br+8(FP), CX
-	MOVQ    32(CX), AX
-	MOVBQZX 40(CX), DX
-	MOVQ    24(CX), BX
-	MOVQ    (CX), CX
+	MOVQ    br+8(FP), BX
+	MOVQ    24(BX), AX
+	MOVBQZX 32(BX), DX
+	MOVQ    (BX), CX
+	MOVQ    8(BX), BX
 	ADDQ    BX, CX
 	MOVQ    CX, (SP)
 	MOVQ    ctx+16(FP), CX
@@ -3520,7 +3603,7 @@ sequenceDecs_decodeSync_safe_bmi2_main_loop:

 sequenceDecs_decodeSync_safe_bmi2_fill_byte_by_byte:
 	CMPQ    BX, $0x00
-	JLE     sequenceDecs_decodeSync_safe_bmi2_fill_end
+	JLE     sequenceDecs_decodeSync_safe_bmi2_fill_check_overread
 	CMPQ    DX, $0x07
 	JLE     sequenceDecs_decodeSync_safe_bmi2_fill_end
 	SHLQ    $0x08, AX
@@ -3531,6 +3614,10 @@ sequenceDecs_decodeSync_safe_bmi2_fill_byte_by_byte:
 	ORQ     CX, AX
 	JMP     sequenceDecs_decodeSync_safe_bmi2_fill_byte_by_byte

+sequenceDecs_decodeSync_safe_bmi2_fill_check_overread:
+	CMPQ DX, $0x40
+	JA   error_overread
+
 sequenceDecs_decodeSync_safe_bmi2_fill_end:
 	// Update offset
 	MOVQ   $0x00000808, CX
@@ -3571,7 +3658,7 @@ sequenceDecs_decodeSync_safe_bmi2_fill_end:

 sequenceDecs_decodeSync_safe_bmi2_fill_2_byte_by_byte:
 	CMPQ    BX, $0x00
-	JLE     sequenceDecs_decodeSync_safe_bmi2_fill_2_end
+	JLE     sequenceDecs_decodeSync_safe_bmi2_fill_2_check_overread
 	CMPQ    DX, $0x07
 	JLE     sequenceDecs_decodeSync_safe_bmi2_fill_2_end
 	SHLQ    $0x08, AX
@@ -3582,6 +3669,10 @@ sequenceDecs_decodeSync_safe_bmi2_fill_2_byte_by_byte:
 	ORQ     CX, AX
 	JMP     sequenceDecs_decodeSync_safe_bmi2_fill_2_byte_by_byte

+sequenceDecs_decodeSync_safe_bmi2_fill_2_check_overread:
+	CMPQ DX, $0x40
+	JA   error_overread
+
 sequenceDecs_decodeSync_safe_bmi2_fill_2_end:
 	// Update literal length
 	MOVQ   $0x00000808, CX
@@ -4019,9 +4110,9 @@ handle_loop:

 loop_finished:
 	MOVQ br+8(FP), CX
-	MOVQ AX, 32(CX)
-	MOVB DL, 40(CX)
-	MOVQ BX, 24(CX)
+	MOVQ AX, 24(CX)
+	MOVB DL, 32(CX)
+	MOVQ BX, 8(CX)

 	// Update the context
 	MOVQ ctx+16(FP), AX
@@ -4067,6 +4158,11 @@ error_not_enough_literals:
 	MOVQ $0x00000004, ret+24(FP)
 	RET

+	// Return with overread error
+error_overread:
+	MOVQ $0x00000006, ret+24(FP)
+	RET
+
 	// Return with not enough output space error
 error_not_enough_space:
 	MOVQ ctx+16(FP), AX
--- a/vendor/github.com/klauspost/compress/zstd/seqdec_generic.go
+++ b/vendor/github.com/klauspost/compress/zstd/seqdec_generic.go
@@ -29,7 +29,7 @@ func (s *sequenceDecs) decode(seqs []seqVals) error {
 	}
 	for i := range seqs {
 		var ll, mo, ml int
-		if br.off > 4+((maxOffsetBits+16+16)>>3) {
+		if len(br.in) > 4+((maxOffsetBits+16+16)>>3) {
 			// inlined function:
 			// ll, mo, ml = s.nextFast(br, llState, mlState, ofState)

--- a/vendor/github.com/klauspost/compress/zstd/snappy.go
+++ b/vendor/github.com/klauspost/compress/zstd/snappy.go
@@ -95,10 +95,9 @@ func (r *SnappyConverter) Convert(in io.Reader, w io.Writer) (int64, error) {
 	var written int64
 	var readHeader bool
 	{
-		var header []byte
-		var n int
-		header, r.err = frameHeader{WindowSize: snappyMaxBlockSize}.appendTo(r.buf[:0])
+		header := frameHeader{WindowSize: snappyMaxBlockSize}.appendTo(r.buf[:0])

+		var n int
 		n, r.err = w.Write(header)
 		if r.err != nil {
 			return written, r.err
--- a/vendor/github.com/klauspost/compress/zstd/zstd.go
+++ b/vendor/github.com/klauspost/compress/zstd/zstd.go
@@ -9,7 +9,6 @@ import (
 	"errors"
 	"log"
 	"math"
-	"math/bits"
 )

 // enable debug printing
@@ -106,33 +105,12 @@ func printf(format string, a ...interface{}) {
 	}
 }

-// matchLen returns the maximum common prefix length of a and b.
-// a must be the shortest of the two.
-func matchLen(a, b []byte) (n int) {
-	for ; len(a) >= 8 && len(b) >= 8; a, b = a[8:], b[8:] {
-		diff := binary.LittleEndian.Uint64(a) ^ binary.LittleEndian.Uint64(b)
-		if diff != 0 {
-			return n + bits.TrailingZeros64(diff)>>3
-		}
-		n += 8
-	}
-
-	for i := range a {
-		if a[i] != b[i] {
-			break
-		}
-		n++
-	}
-	return n
-
-}
-
 func load3232(b []byte, i int32) uint32 {
-	return binary.LittleEndian.Uint32(b[i:])
+	return binary.LittleEndian.Uint32(b[:len(b):len(b)][i:])
 }

 func load6432(b []byte, i int32) uint64 {
-	return binary.LittleEndian.Uint64(b[i:])
+	return binary.LittleEndian.Uint64(b[:len(b):len(b)][i:])
 }

 type byter interface {
--- a/vendor/github.com/klauspost/cpuid/v2/README.md
+++ b/vendor/github.com/klauspost/cpuid/v2/README.md
@@ -9,16 +9,19 @@ You can access the CPU information by accessing the shared CPU variable of the c
 Package home: https://github.com/klauspost/cpuid

 [![PkgGoDev](https://pkg.go.dev/badge/github.com/klauspost/cpuid)](https://pkg.go.dev/github.com/klauspost/cpuid/v2)
-[![Build Status][3]][4]
-
-[3]: https://travis-ci.org/klauspost/cpuid.svg?branch=master
-[4]: https://travis-ci.org/klauspost/cpuid
+[![Go](https://github.com/klauspost/cpuid/actions/workflows/go.yml/badge.svg)](https://github.com/klauspost/cpuid/actions/workflows/go.yml)

 ## installing

 `go get -u github.com/klauspost/cpuid/v2` using modules.
 Drop `v2` for others.

+Installing binary:
+
+`go install github.com/klauspost/cpuid/v2/cmd/cpuid@latest`
+
+Or download binaries from release page: https://github.com/klauspost/cpuid/releases
+
 ### Homebrew

 For macOS/Linux users, you can install via [brew](https://brew.sh/)
@@ -279,7 +282,12 @@ Exit Code 1
 | AMXINT8            | Tile computational operations on 8-bit integers                                                                                                                                    |
 | AMXFP16            | Tile computational operations on FP16 numbers                                                                                                                                      |
 | AMXTILE            | Tile architecture                                                                                                                                                                  |
+| APX_F              | Intel APX                                                                                                                                                                          |
 | AVX                | AVX functions                                                                                                                                                                      |
+| AVX10              | If set the Intel AVX10 Converged Vector ISA is supported                                                                                                                           |
+| AVX10_128          | If set indicates that AVX10 128-bit vector support is present                                                                                                                      |
+| AVX10_256          | If set indicates that AVX10 256-bit vector support is present                                                                                                                      |
+| AVX10_512          | If set indicates that AVX10 512-bit vector support is present                                                                                                                      |
 | AVX2               | AVX2 functions                                                                                                                                                                     |
 | AVX512BF16         | AVX-512 BFLOAT16 Instructions                                                                                                                                                      |
 | AVX512BITALG       | AVX-512 Bit Algorithms                                                                                                                                                             |
@@ -302,6 +310,7 @@ Exit Code 1
 | AVXSLOW            | Indicates the CPU performs 2 128 bit operations instead of one                                                                                                                     |
 | AVXVNNI            | AVX (VEX encoded) VNNI neural network instructions                                                                                                                                 |
 | AVXVNNIINT8        | AVX-VNNI-INT8 instructions                                                                                                                                                         |
+| BHI_CTRL           | Branch History Injection and Intra-mode Branch Target Injection / CVE-2022-0001, CVE-2022-0002 / INTEL-SA-00598                                                                    |
 | BMI1               | Bit Manipulation Instruction Set 1                                                                                                                                                 |
 | BMI2               | Bit Manipulation Instruction Set 2                                                                                                                                                 |
 | CETIBT             | Intel CET Indirect Branch Tracking                                                                                                                                                 |
@@ -355,8 +364,11 @@ Exit Code 1
 | IBS_OPFUSE         | AMD: Indicates support for IbsOpFuse                                                                                                                                               |
 | IBS_PREVENTHOST    | Disallowing IBS use by the host supported                                                                                                                                          |
 | IBS_ZEN4           | Fetch and Op IBS support IBS extensions added with Zen4                                                                                                                            |
+| IDPRED_CTRL        | IPRED_DIS                                                                                                                                                                          |
 | INT_WBINVD         | WBINVD/WBNOINVD are interruptible.                                                                                                                                                 |
 | INVLPGB            | NVLPGB and TLBSYNC instruction supported                                                                                                                                           |
+| KEYLOCKER          | Key locker                                                                                                                                                                         |
+| KEYLOCKERW         | Key locker wide                                                                                                                                                                    |
 | LAHF               | LAHF/SAHF in long mode                                                                                                                                                             |
 | LAM                | If set, CPU supports Linear Address Masking                                                                                                                                        |
 | LBRVIRT            | LBR virtualization                                                                                                                                                                 |
@@ -374,6 +386,7 @@ Exit Code 1
 | MPX                | Intel MPX (Memory Protection Extensions)                                                                                                                                           |
 | MOVU               | MOVU SSE instructions are more efficient and should be preferred to SSE	MOVL/MOVH. MOVUPS is more efficient than MOVLPS/MOVHPS. MOVUPD is more efficient than MOVLPD/MOVHPD        |
 | MSRIRC             | Instruction Retired Counter MSR available                                                                                                                                          |
+| MSRLIST            | Read/Write List of Model Specific Registers                                                                                                                                        |
 | MSR_PAGEFLUSH      | Page Flush MSR available                                                                                                                                                           |
 | NRIPS              | Indicates support for NRIP save on VMEXIT                                                                                                                                          |
 | NX                 | NX (No-Execute) bit                                                                                                                                                                |
@@ -381,12 +394,13 @@ Exit Code 1
 | PCONFIG            | PCONFIG for Intel Multi-Key Total Memory Encryption                                                                                                                                |
 | POPCNT             | POPCNT instruction                                                                                                                                                                 |
 | PPIN               | AMD: Protected Processor Inventory Number support. Indicates that Protected Processor Inventory Number (PPIN) capability can be enabled                                            |
-| PREFETCHI          | PREFETCHIT0/1 instructions                                            |
-| PSFD               | AMD: Predictive Store Forward Disable                                                                                                                                              |
+| PREFETCHI          | PREFETCHIT0/1 instructions                                                                                                                                                         |
+| PSFD               | Predictive Store Forward Disable                                                                                                                                                   |
 | RDPRU              | RDPRU instruction supported                                                                                                                                                        |
 | RDRAND             | RDRAND instruction is available                                                                                                                                                    |
 | RDSEED             | RDSEED instruction is available                                                                                                                                                    |
 | RDTSCP             | RDTSCP Instruction                                                                                                                                                                 |
+| RRSBA_CTRL         | Restricted RSB Alternate                                                                                                                                                           |
 | RTM                | Restricted Transactional Memory                                                                                                                                                    |
 | RTM_ALWAYS_ABORT   | Indicates that the loaded microcode is forcing RTM abort.                                                                                                                          |
 | SERIALIZE          | Serialize Instruction Execution                                                                                                                                                    |
@@ -425,6 +439,7 @@ Exit Code 1
 | SYSCALL            | System-Call Extension (SCE): SYSCALL and SYSRET instructions.                                                                                                                      |
 | SYSEE              | SYSENTER and SYSEXIT instructions                                                                                                                                                  |
 | TBM                | AMD Trailing Bit Manipulation                                                                                                                                                      |
+| TDX_GUEST          | Intel Trust Domain Extensions Guest                                                                                                                                                |
 | TLB_FLUSH_NESTED   | AMD: Flushing includes all the nested translations for guest translations                                                                                                          |
 | TME                | Intel Total Memory Encryption. The following MSRs are supported: IA32_TME_CAPABILITY, IA32_TME_ACTIVATE, IA32_TME_EXCLUDE_MASK, and IA32_TME_EXCLUDE_BASE.                         |
 | TOPEXT             | TopologyExtensions: topology extensions support. Indicates support for CPUID Fn8000_001D_EAX_x[N:0]-CPUID Fn8000_001E_EDX.                                                         |
@@ -439,6 +454,7 @@ Exit Code 1
 | VTE                | AMD Virtual Transparent Encryption supported                                                                                                                                       |
 | WAITPKG            | TPAUSE, UMONITOR, UMWAIT                                                                                                                                                           |
 | WBNOINVD           | Write Back and Do Not Invalidate Cache                                                                                                                                             |
+| WRMSRNS            | Non-Serializing Write to Model Specific Register                                                                                                                                   |
 | X87                | FPU                                                                                                                                                                                |
 | XGETBV1            | Supports XGETBV with ECX = 1                                                                                                                                                       |
 | XOP                | Bulldozer XOP functions                                                                                                                                                            |
--- a/vendor/github.com/klauspost/cpuid/v2/cpuid.go
+++ b/vendor/github.com/klauspost/cpuid/v2/cpuid.go
@@ -67,182 +67,200 @@ const (
 	// Keep index -1 as unknown
 	UNKNOWN = -1

-	// Add features
-	ADX                FeatureID = iota // Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
-	AESNI                               // Advanced Encryption Standard New Instructions
-	AMD3DNOW                            // AMD 3DNOW
-	AMD3DNOWEXT                         // AMD 3DNowExt
-	AMXBF16                             // Tile computational operations on BFLOAT16 numbers
-	AMXFP16                             // Tile computational operations on FP16 numbers
-	AMXINT8                             // Tile computational operations on 8-bit integers
-	AMXTILE                             // Tile architecture
-	AVX                                 // AVX functions
-	AVX2                                // AVX2 functions
-	AVX512BF16                          // AVX-512 BFLOAT16 Instructions
-	AVX512BITALG                        // AVX-512 Bit Algorithms
-	AVX512BW                            // AVX-512 Byte and Word Instructions
-	AVX512CD                            // AVX-512 Conflict Detection Instructions
-	AVX512DQ                            // AVX-512 Doubleword and Quadword Instructions
-	AVX512ER                            // AVX-512 Exponential and Reciprocal Instructions
-	AVX512F                             // AVX-512 Foundation
-	AVX512FP16                          // AVX-512 FP16 Instructions
-	AVX512IFMA                          // AVX-512 Integer Fused Multiply-Add Instructions
-	AVX512PF                            // AVX-512 Prefetch Instructions
-	AVX512VBMI                          // AVX-512 Vector Bit Manipulation Instructions
-	AVX512VBMI2                         // AVX-512 Vector Bit Manipulation Instructions, Version 2
-	AVX512VL                            // AVX-512 Vector Length Extensions
-	AVX512VNNI                          // AVX-512 Vector Neural Network Instructions
-	AVX512VP2INTERSECT                  // AVX-512 Intersect for D/Q
-	AVX512VPOPCNTDQ                     // AVX-512 Vector Population Count Doubleword and Quadword
-	AVXIFMA                             // AVX-IFMA instructions
-	AVXNECONVERT                        // AVX-NE-CONVERT instructions
-	AVXSLOW                             // Indicates the CPU performs 2 128 bit operations instead of one
-	AVXVNNI                             // AVX (VEX encoded) VNNI neural network instructions
-	AVXVNNIINT8                         // AVX-VNNI-INT8 instructions
-	BMI1                                // Bit Manipulation Instruction Set 1
-	BMI2                                // Bit Manipulation Instruction Set 2
-	CETIBT                              // Intel CET Indirect Branch Tracking
-	CETSS                               // Intel CET Shadow Stack
-	CLDEMOTE                            // Cache Line Demote
-	CLMUL                               // Carry-less Multiplication
-	CLZERO                              // CLZERO instruction supported
-	CMOV                                // i686 CMOV
-	CMPCCXADD                           // CMPCCXADD instructions
-	CMPSB_SCADBS_SHORT                  // Fast short CMPSB and SCASB
-	CMPXCHG8                            // CMPXCHG8 instruction
-	CPBOOST                             // Core Performance Boost
-	CPPC                                // AMD: Collaborative Processor Performance Control
-	CX16                                // CMPXCHG16B Instruction
-	EFER_LMSLE_UNS                      // AMD: =Core::X86::Msr::EFER[LMSLE] is not supported, and MBZ
-	ENQCMD                              // Enqueue Command
-	ERMS                                // Enhanced REP MOVSB/STOSB
-	F16C                                // Half-precision floating-point conversion
-	FLUSH_L1D                           // Flush L1D cache
-	FMA3                                // Intel FMA 3. Does not imply AVX.
-	FMA4                                // Bulldozer FMA4 functions
-	FP128                               // AMD: When set, the internal FP/SIMD execution datapath is no more than 128-bits wide
-	FP256                               // AMD: When set, the internal FP/SIMD execution datapath is no more than 256-bits wide
-	FSRM                                // Fast Short Rep Mov
-	FXSR                                // FXSAVE, FXRESTOR instructions, CR4 bit 9
-	FXSROPT                             // FXSAVE/FXRSTOR optimizations
-	GFNI                                // Galois Field New Instructions. May require other features (AVX, AVX512VL,AVX512F) based on usage.
-	HLE                                 // Hardware Lock Elision
-	HRESET                              // If set CPU supports history reset and the IA32_HRESET_ENABLE MSR
-	HTT                                 // Hyperthreading (enabled)
-	HWA                                 // Hardware assert supported. Indicates support for MSRC001_10
-	HYBRID_CPU                          // This part has CPUs of more than one type.
-	HYPERVISOR                          // This bit has been reserved by Intel & AMD for use by hypervisors
-	IA32_ARCH_CAP                       // IA32_ARCH_CAPABILITIES MSR (Intel)
-	IA32_CORE_CAP                       // IA32_CORE_CAPABILITIES MSR
-	IBPB                                // Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB)
-	IBRS                                // AMD: Indirect Branch Restricted Speculation
-	IBRS_PREFERRED                      // AMD: IBRS is preferred over software solution
-	IBRS_PROVIDES_SMP                   // AMD: IBRS provides Same Mode Protection
-	IBS                                 // Instruction Based Sampling (AMD)
-	IBSBRNTRGT                          // Instruction Based Sampling Feature (AMD)
-	IBSFETCHSAM                         // Instruction Based Sampling Feature (AMD)
-	IBSFFV                              // Instruction Based Sampling Feature (AMD)
-	IBSOPCNT                            // Instruction Based Sampling Feature (AMD)
-	IBSOPCNTEXT                         // Instruction Based Sampling Feature (AMD)
-	IBSOPSAM                            // Instruction Based Sampling Feature (AMD)
-	IBSRDWROPCNT                        // Instruction Based Sampling Feature (AMD)
-	IBSRIPINVALIDCHK                    // Instruction Based Sampling Feature (AMD)
-	IBS_FETCH_CTLX                      // AMD: IBS fetch control extended MSR supported
-	IBS_OPDATA4                         // AMD: IBS op data 4 MSR supported
-	IBS_OPFUSE                          // AMD: Indicates support for IbsOpFuse
-	IBS_PREVENTHOST                     // Disallowing IBS use by the host supported
-	IBS_ZEN4                            // AMD: Fetch and Op IBS support IBS extensions added with Zen4
-	INT_WBINVD                          // WBINVD/WBNOINVD are interruptible.
-	INVLPGB                             // NVLPGB and TLBSYNC instruction supported
-	LAHF                                // LAHF/SAHF in long mode
-	LAM                                 // If set, CPU supports Linear Address Masking
-	LBRVIRT                             // LBR virtualization
-	LZCNT                               // LZCNT instruction
-	MCAOVERFLOW                         // MCA overflow recovery support.
-	MCDT_NO                             // Processor do not exhibit MXCSR Configuration Dependent Timing behavior and do not need to mitigate it.
-	MCOMMIT                             // MCOMMIT instruction supported
-	MD_CLEAR                            // VERW clears CPU buffers
-	MMX                                 // standard MMX
-	MMXEXT                              // SSE integer functions or AMD MMX ext
-	MOVBE                               // MOVBE instruction (big-endian)
-	MOVDIR64B                           // Move 64 Bytes as Direct Store
-	MOVDIRI                             // Move Doubleword as Direct Store
-	MOVSB_ZL                            // Fast Zero-Length MOVSB
-	MOVU                                // AMD: MOVU SSE instructions are more efficient and should be preferred to SSE	MOVL/MOVH. MOVUPS is more efficient than MOVLPS/MOVHPS. MOVUPD is more efficient than MOVLPD/MOVHPD
-	MPX                                 // Intel MPX (Memory Protection Extensions)
-	MSRIRC                              // Instruction Retired Counter MSR available
-	MSR_PAGEFLUSH                       // Page Flush MSR available
-	NRIPS                               // Indicates support for NRIP save on VMEXIT
-	NX                                  // NX (No-Execute) bit
-	OSXSAVE                             // XSAVE enabled by OS
-	PCONFIG                             // PCONFIG for Intel Multi-Key Total Memory Encryption
-	POPCNT                              // POPCNT instruction
-	PPIN                                // AMD: Protected Processor Inventory Number support. Indicates that Protected Processor Inventory Number (PPIN) capability can be enabled
-	PREFETCHI                           // PREFETCHIT0/1 instructions
-	PSFD                                // AMD: Predictive Store Forward Disable
-	RDPRU                               // RDPRU instruction supported
-	RDRAND                              // RDRAND instruction is available
-	RDSEED                              // RDSEED instruction is available
-	RDTSCP                              // RDTSCP Instruction
-	RTM                                 // Restricted Transactional Memory
-	RTM_ALWAYS_ABORT                    // Indicates that the loaded microcode is forcing RTM abort.
-	SERIALIZE                           // Serialize Instruction Execution
-	SEV                                 // AMD Secure Encrypted Virtualization supported
-	SEV_64BIT                           // AMD SEV guest execution only allowed from a 64-bit host
-	SEV_ALTERNATIVE                     // AMD SEV Alternate Injection supported
-	SEV_DEBUGSWAP                       // Full debug state swap supported for SEV-ES guests
-	SEV_ES                              // AMD SEV Encrypted State supported
-	SEV_RESTRICTED                      // AMD SEV Restricted Injection supported
-	SEV_SNP                             // AMD SEV Secure Nested Paging supported
-	SGX                                 // Software Guard Extensions
-	SGXLC                               // Software Guard Extensions Launch Control
-	SHA                                 // Intel SHA Extensions
-	SME                                 // AMD Secure Memory Encryption supported
-	SME_COHERENT                        // AMD Hardware cache coherency across encryption domains enforced
-	SPEC_CTRL_SSBD                      // Speculative Store Bypass Disable
-	SRBDS_CTRL                          // SRBDS mitigation MSR available
-	SSE                                 // SSE functions
-	SSE2                                // P4 SSE functions
-	SSE3                                // Prescott SSE3 functions
-	SSE4                                // Penryn SSE4.1 functions
-	SSE42                               // Nehalem SSE4.2 functions
-	SSE4A                               // AMD Barcelona microarchitecture SSE4a instructions
-	SSSE3                               // Conroe SSSE3 functions
-	STIBP                               // Single Thread Indirect Branch Predictors
-	STIBP_ALWAYSON                      // AMD: Single Thread Indirect Branch Prediction Mode has Enhanced Performance and may be left Always On
-	STOSB_SHORT                         // Fast short STOSB
-	SUCCOR                              // Software uncorrectable error containment and recovery capability.
-	SVM                                 // AMD Secure Virtual Machine
-	SVMDA                               // Indicates support for the SVM decode assists.
-	SVMFBASID                           // SVM, Indicates that TLB flush events, including CR3 writes and CR4.PGE toggles, flush only the current ASID's TLB entries. Also indicates support for the extended VMCBTLB_Control
-	SVML                                // AMD SVM lock. Indicates support for SVM-Lock.
-	SVMNP                               // AMD SVM nested paging
-	SVMPF                               // SVM pause intercept filter. Indicates support for the pause intercept filter
-	SVMPFT                              // SVM PAUSE filter threshold. Indicates support for the PAUSE filter cycle count threshold
-	SYSCALL                             // System-Call Extension (SCE): SYSCALL and SYSRET instructions.
-	SYSEE                               // SYSENTER and SYSEXIT instructions
-	TBM                                 // AMD Trailing Bit Manipulation
-	TLB_FLUSH_NESTED                    // AMD: Flushing includes all the nested translations for guest translations
-	TME                                 // Intel Total Memory Encryption. The following MSRs are supported: IA32_TME_CAPABILITY, IA32_TME_ACTIVATE, IA32_TME_EXCLUDE_MASK, and IA32_TME_EXCLUDE_BASE.
-	TOPEXT                              // TopologyExtensions: topology extensions support. Indicates support for CPUID Fn8000_001D_EAX_x[N:0]-CPUID Fn8000_001E_EDX.
-	TSCRATEMSR                          // MSR based TSC rate control. Indicates support for MSR TSC ratio MSRC000_0104
-	TSXLDTRK                            // Intel TSX Suspend Load Address Tracking
-	VAES                                // Vector AES. AVX(512) versions requires additional checks.
-	VMCBCLEAN                           // VMCB clean bits. Indicates support for VMCB clean bits.
-	VMPL                                // AMD VM Permission Levels supported
-	VMSA_REGPROT                        // AMD VMSA Register Protection supported
-	VMX                                 // Virtual Machine Extensions
-	VPCLMULQDQ                          // Carry-Less Multiplication Quadword. Requires AVX for 3 register versions.
-	VTE                                 // AMD Virtual Transparent Encryption supported
-	WAITPKG                             // TPAUSE, UMONITOR, UMWAIT
-	WBNOINVD                            // Write Back and Do Not Invalidate Cache
-	X87                                 // FPU
-	XGETBV1                             // Supports XGETBV with ECX = 1
-	XOP                                 // Bulldozer XOP functions
-	XSAVE                               // XSAVE, XRESTOR, XSETBV, XGETBV
-	XSAVEC                              // Supports XSAVEC and the compacted form of XRSTOR.
-	XSAVEOPT                            // XSAVEOPT available
-	XSAVES                              // Supports XSAVES/XRSTORS and IA32_XSS
+	// x86 features
+	ADX                 FeatureID = iota // Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
+	AESNI                                // Advanced Encryption Standard New Instructions
+	AMD3DNOW                             // AMD 3DNOW
+	AMD3DNOWEXT                          // AMD 3DNowExt
+	AMXBF16                              // Tile computational operations on BFLOAT16 numbers
+	AMXFP16                              // Tile computational operations on FP16 numbers
+	AMXINT8                              // Tile computational operations on 8-bit integers
+	AMXTILE                              // Tile architecture
+	APX_F                                // Intel APX
+	AVX                                  // AVX functions
+	AVX10                                // If set the Intel AVX10 Converged Vector ISA is supported
+	AVX10_128                            // If set indicates that AVX10 128-bit vector support is present
+	AVX10_256                            // If set indicates that AVX10 256-bit vector support is present
+	AVX10_512                            // If set indicates that AVX10 512-bit vector support is present
+	AVX2                                 // AVX2 functions
+	AVX512BF16                           // AVX-512 BFLOAT16 Instructions
+	AVX512BITALG                         // AVX-512 Bit Algorithms
+	AVX512BW                             // AVX-512 Byte and Word Instructions
+	AVX512CD                             // AVX-512 Conflict Detection Instructions
+	AVX512DQ                             // AVX-512 Doubleword and Quadword Instructions
+	AVX512ER                             // AVX-512 Exponential and Reciprocal Instructions
+	AVX512F                              // AVX-512 Foundation
+	AVX512FP16                           // AVX-512 FP16 Instructions
+	AVX512IFMA                           // AVX-512 Integer Fused Multiply-Add Instructions
+	AVX512PF                             // AVX-512 Prefetch Instructions
+	AVX512VBMI                           // AVX-512 Vector Bit Manipulation Instructions
+	AVX512VBMI2                          // AVX-512 Vector Bit Manipulation Instructions, Version 2
+	AVX512VL                             // AVX-512 Vector Length Extensions
+	AVX512VNNI                           // AVX-512 Vector Neural Network Instructions
+	AVX512VP2INTERSECT                   // AVX-512 Intersect for D/Q
+	AVX512VPOPCNTDQ                      // AVX-512 Vector Population Count Doubleword and Quadword
+	AVXIFMA                              // AVX-IFMA instructions
+	AVXNECONVERT                         // AVX-NE-CONVERT instructions
+	AVXSLOW                              // Indicates the CPU performs 2 128 bit operations instead of one
+	AVXVNNI                              // AVX (VEX encoded) VNNI neural network instructions
+	AVXVNNIINT8                          // AVX-VNNI-INT8 instructions
+	BHI_CTRL                             // Branch History Injection and Intra-mode Branch Target Injection / CVE-2022-0001, CVE-2022-0002 / INTEL-SA-00598
+	BMI1                                 // Bit Manipulation Instruction Set 1
+	BMI2                                 // Bit Manipulation Instruction Set 2
+	CETIBT                               // Intel CET Indirect Branch Tracking
+	CETSS                                // Intel CET Shadow Stack
+	CLDEMOTE                             // Cache Line Demote
+	CLMUL                                // Carry-less Multiplication
+	CLZERO                               // CLZERO instruction supported
+	CMOV                                 // i686 CMOV
+	CMPCCXADD                            // CMPCCXADD instructions
+	CMPSB_SCADBS_SHORT                   // Fast short CMPSB and SCASB
+	CMPXCHG8                             // CMPXCHG8 instruction
+	CPBOOST                              // Core Performance Boost
+	CPPC                                 // AMD: Collaborative Processor Performance Control
+	CX16                                 // CMPXCHG16B Instruction
+	EFER_LMSLE_UNS                       // AMD: =Core::X86::Msr::EFER[LMSLE] is not supported, and MBZ
+	ENQCMD                               // Enqueue Command
+	ERMS                                 // Enhanced REP MOVSB/STOSB
+	F16C                                 // Half-precision floating-point conversion
+	FLUSH_L1D                            // Flush L1D cache
+	FMA3                                 // Intel FMA 3. Does not imply AVX.
+	FMA4                                 // Bulldozer FMA4 functions
+	FP128                                // AMD: When set, the internal FP/SIMD execution datapath is no more than 128-bits wide
+	FP256                                // AMD: When set, the internal FP/SIMD execution datapath is no more than 256-bits wide
+	FSRM                                 // Fast Short Rep Mov
+	FXSR                                 // FXSAVE, FXRESTOR instructions, CR4 bit 9
+	FXSROPT                              // FXSAVE/FXRSTOR optimizations
+	GFNI                                 // Galois Field New Instructions. May require other features (AVX, AVX512VL,AVX512F) based on usage.
+	HLE                                  // Hardware Lock Elision
+	HRESET                               // If set CPU supports history reset and the IA32_HRESET_ENABLE MSR
+	HTT                                  // Hyperthreading (enabled)
+	HWA                                  // Hardware assert supported. Indicates support for MSRC001_10
+	HYBRID_CPU                           // This part has CPUs of more than one type.
+	HYPERVISOR                           // This bit has been reserved by Intel & AMD for use by hypervisors
+	IA32_ARCH_CAP                        // IA32_ARCH_CAPABILITIES MSR (Intel)
+	IA32_CORE_CAP                        // IA32_CORE_CAPABILITIES MSR
+	IBPB                                 // Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB)
+	IBPB_BRTYPE                          // Indicates that MSR 49h (PRED_CMD) bit 0 (IBPB) flushes	all branch type predictions from the CPU branch predictor
+	IBRS                                 // AMD: Indirect Branch Restricted Speculation
+	IBRS_PREFERRED                       // AMD: IBRS is preferred over software solution
+	IBRS_PROVIDES_SMP                    // AMD: IBRS provides Same Mode Protection
+	IBS                                  // Instruction Based Sampling (AMD)
+	IBSBRNTRGT                           // Instruction Based Sampling Feature (AMD)
+	IBSFETCHSAM                          // Instruction Based Sampling Feature (AMD)
+	IBSFFV                               // Instruction Based Sampling Feature (AMD)
+	IBSOPCNT                             // Instruction Based Sampling Feature (AMD)
+	IBSOPCNTEXT                          // Instruction Based Sampling Feature (AMD)
+	IBSOPSAM                             // Instruction Based Sampling Feature (AMD)
+	IBSRDWROPCNT                         // Instruction Based Sampling Feature (AMD)
+	IBSRIPINVALIDCHK                     // Instruction Based Sampling Feature (AMD)
+	IBS_FETCH_CTLX                       // AMD: IBS fetch control extended MSR supported
+	IBS_OPDATA4                          // AMD: IBS op data 4 MSR supported
+	IBS_OPFUSE                           // AMD: Indicates support for IbsOpFuse
+	IBS_PREVENTHOST                      // Disallowing IBS use by the host supported
+	IBS_ZEN4                             // AMD: Fetch and Op IBS support IBS extensions added with Zen4
+	IDPRED_CTRL                          // IPRED_DIS
+	INT_WBINVD                           // WBINVD/WBNOINVD are interruptible.
+	INVLPGB                              // NVLPGB and TLBSYNC instruction supported
+	KEYLOCKER                            // Key locker
+	KEYLOCKERW                           // Key locker wide
+	LAHF                                 // LAHF/SAHF in long mode
+	LAM                                  // If set, CPU supports Linear Address Masking
+	LBRVIRT                              // LBR virtualization
+	LZCNT                                // LZCNT instruction
+	MCAOVERFLOW                          // MCA overflow recovery support.
+	MCDT_NO                              // Processor do not exhibit MXCSR Configuration Dependent Timing behavior and do not need to mitigate it.
+	MCOMMIT                              // MCOMMIT instruction supported
+	MD_CLEAR                             // VERW clears CPU buffers
+	MMX                                  // standard MMX
+	MMXEXT                               // SSE integer functions or AMD MMX ext
+	MOVBE                                // MOVBE instruction (big-endian)
+	MOVDIR64B                            // Move 64 Bytes as Direct Store
+	MOVDIRI                              // Move Doubleword as Direct Store
+	MOVSB_ZL                             // Fast Zero-Length MOVSB
+	MOVU                                 // AMD: MOVU SSE instructions are more efficient and should be preferred to SSE	MOVL/MOVH. MOVUPS is more efficient than MOVLPS/MOVHPS. MOVUPD is more efficient than MOVLPD/MOVHPD
+	MPX                                  // Intel MPX (Memory Protection Extensions)
+	MSRIRC                               // Instruction Retired Counter MSR available
+	MSRLIST                              // Read/Write List of Model Specific Registers
+	MSR_PAGEFLUSH                        // Page Flush MSR available
+	NRIPS                                // Indicates support for NRIP save on VMEXIT
+	NX                                   // NX (No-Execute) bit
+	OSXSAVE                              // XSAVE enabled by OS
+	PCONFIG                              // PCONFIG for Intel Multi-Key Total Memory Encryption
+	POPCNT                               // POPCNT instruction
+	PPIN                                 // AMD: Protected Processor Inventory Number support. Indicates that Protected Processor Inventory Number (PPIN) capability can be enabled
+	PREFETCHI                            // PREFETCHIT0/1 instructions
+	PSFD                                 // Predictive Store Forward Disable
+	RDPRU                                // RDPRU instruction supported
+	RDRAND                               // RDRAND instruction is available
+	RDSEED                               // RDSEED instruction is available
+	RDTSCP                               // RDTSCP Instruction
+	RRSBA_CTRL                           // Restricted RSB Alternate
+	RTM                                  // Restricted Transactional Memory
+	RTM_ALWAYS_ABORT                     // Indicates that the loaded microcode is forcing RTM abort.
+	SBPB                                 // Indicates support for the Selective Branch Predictor Barrier
+	SERIALIZE                            // Serialize Instruction Execution
+	SEV                                  // AMD Secure Encrypted Virtualization supported
+	SEV_64BIT                            // AMD SEV guest execution only allowed from a 64-bit host
+	SEV_ALTERNATIVE                      // AMD SEV Alternate Injection supported
+	SEV_DEBUGSWAP                        // Full debug state swap supported for SEV-ES guests
+	SEV_ES                               // AMD SEV Encrypted State supported
+	SEV_RESTRICTED                       // AMD SEV Restricted Injection supported
+	SEV_SNP                              // AMD SEV Secure Nested Paging supported
+	SGX                                  // Software Guard Extensions
+	SGXLC                                // Software Guard Extensions Launch Control
+	SHA                                  // Intel SHA Extensions
+	SME                                  // AMD Secure Memory Encryption supported
+	SME_COHERENT                         // AMD Hardware cache coherency across encryption domains enforced
+	SPEC_CTRL_SSBD                       // Speculative Store Bypass Disable
+	SRBDS_CTRL                           // SRBDS mitigation MSR available
+	SRSO_MSR_FIX                         // Indicates that software may use MSR BP_CFG[BpSpecReduce] to mitigate SRSO.
+	SRSO_NO                              // Indicates the CPU is not subject to the SRSO vulnerability
+	SRSO_USER_KERNEL_NO                  // Indicates the CPU is not subject to the SRSO vulnerability across user/kernel boundaries
+	SSE                                  // SSE functions
+	SSE2                                 // P4 SSE functions
+	SSE3                                 // Prescott SSE3 functions
+	SSE4                                 // Penryn SSE4.1 functions
+	SSE42                                // Nehalem SSE4.2 functions
+	SSE4A                                // AMD Barcelona microarchitecture SSE4a instructions
+	SSSE3                                // Conroe SSSE3 functions
+	STIBP                                // Single Thread Indirect Branch Predictors
+	STIBP_ALWAYSON                       // AMD: Single Thread Indirect Branch Prediction Mode has Enhanced Performance and may be left Always On
+	STOSB_SHORT                          // Fast short STOSB
+	SUCCOR                               // Software uncorrectable error containment and recovery capability.
+	SVM                                  // AMD Secure Virtual Machine
+	SVMDA                                // Indicates support for the SVM decode assists.
+	SVMFBASID                            // SVM, Indicates that TLB flush events, including CR3 writes and CR4.PGE toggles, flush only the current ASID's TLB entries. Also indicates support for the extended VMCBTLB_Control
+	SVML                                 // AMD SVM lock. Indicates support for SVM-Lock.
+	SVMNP                                // AMD SVM nested paging
+	SVMPF                                // SVM pause intercept filter. Indicates support for the pause intercept filter
+	SVMPFT                               // SVM PAUSE filter threshold. Indicates support for the PAUSE filter cycle count threshold
+	SYSCALL                              // System-Call Extension (SCE): SYSCALL and SYSRET instructions.
+	SYSEE                                // SYSENTER and SYSEXIT instructions
+	TBM                                  // AMD Trailing Bit Manipulation
+	TDX_GUEST                            // Intel Trust Domain Extensions Guest
+	TLB_FLUSH_NESTED                     // AMD: Flushing includes all the nested translations for guest translations
+	TME                                  // Intel Total Memory Encryption. The following MSRs are supported: IA32_TME_CAPABILITY, IA32_TME_ACTIVATE, IA32_TME_EXCLUDE_MASK, and IA32_TME_EXCLUDE_BASE.
+	TOPEXT                               // TopologyExtensions: topology extensions support. Indicates support for CPUID Fn8000_001D_EAX_x[N:0]-CPUID Fn8000_001E_EDX.
+	TSCRATEMSR                           // MSR based TSC rate control. Indicates support for MSR TSC ratio MSRC000_0104
+	TSXLDTRK                             // Intel TSX Suspend Load Address Tracking
+	VAES                                 // Vector AES. AVX(512) versions requires additional checks.
+	VMCBCLEAN                            // VMCB clean bits. Indicates support for VMCB clean bits.
+	VMPL                                 // AMD VM Permission Levels supported
+	VMSA_REGPROT                         // AMD VMSA Register Protection supported
+	VMX                                  // Virtual Machine Extensions
+	VPCLMULQDQ                           // Carry-Less Multiplication Quadword. Requires AVX for 3 register versions.
+	VTE                                  // AMD Virtual Transparent Encryption supported
+	WAITPKG                              // TPAUSE, UMONITOR, UMWAIT
+	WBNOINVD                             // Write Back and Do Not Invalidate Cache
+	WRMSRNS                              // Non-Serializing Write to Model Specific Register
+	X87                                  // FPU
+	XGETBV1                              // Supports XGETBV with ECX = 1
+	XOP                                  // Bulldozer XOP functions
+	XSAVE                                // XSAVE, XRESTOR, XSETBV, XGETBV
+	XSAVEC                               // Supports XSAVEC and the compacted form of XRSTOR.
+	XSAVEOPT                             // XSAVEOPT available
+	XSAVES                               // Supports XSAVES/XRSTORS and IA32_XSS

 	// ARM features:
 	AESARM   // AES instructions
@@ -296,9 +314,11 @@ type CPUInfo struct {
 		L2  int // L2 Cache (per core or shared). Will be -1 if undetected
 		L3  int // L3 Cache (per core, per ccx or shared). Will be -1 if undetected
 	}
-	SGX       SGXSupport
-	maxFunc   uint32
-	maxExFunc uint32
+	SGX              SGXSupport
+	AMDMemEncryption AMDMemEncryptionSupport
+	AVX10Level       uint8
+	maxFunc          uint32
+	maxExFunc        uint32
 }

 var cpuid func(op uint32) (eax, ebx, ecx, edx uint32)
@@ -1065,6 +1085,32 @@ func hasSGX(available, lc bool) (rval SGXSupport) {
 	return
 }

+type AMDMemEncryptionSupport struct {
+	Available          bool
+	CBitPossition      uint32
+	NumVMPL            uint32
+	PhysAddrReduction  uint32
+	NumEntryptedGuests uint32
+	MinSevNoEsAsid     uint32
+}
+
+func hasAMDMemEncryption(available bool) (rval AMDMemEncryptionSupport) {
+	rval.Available = available
+	if !available {
+		return
+	}
+
+	_, b, c, d := cpuidex(0x8000001f, 0)
+
+	rval.CBitPossition = b & 0x3f
+	rval.PhysAddrReduction = (b >> 6) & 0x3F
+	rval.NumVMPL = (b >> 12) & 0xf
+	rval.NumEntryptedGuests = c
+	rval.MinSevNoEsAsid = d
+
+	return
+}
+
 func support() flagSet {
 	var fs flagSet
 	mfi := maxFunctionID()
@@ -1159,6 +1205,7 @@ func support() flagSet {
 		fs.setIf(ecx&(1<<10) != 0, VPCLMULQDQ)
 		fs.setIf(ecx&(1<<13) != 0, TME)
 		fs.setIf(ecx&(1<<25) != 0, CLDEMOTE)
+		fs.setIf(ecx&(1<<23) != 0, KEYLOCKER)
 		fs.setIf(ecx&(1<<27) != 0, MOVDIRI)
 		fs.setIf(ecx&(1<<28) != 0, MOVDIR64B)
 		fs.setIf(ecx&(1<<29) != 0, ENQCMD)
@@ -1181,13 +1228,8 @@ func support() flagSet {
 		fs.setIf(edx&(1<<30) != 0, IA32_CORE_CAP)
 		fs.setIf(edx&(1<<31) != 0, SPEC_CTRL_SSBD)

-		// CPUID.(EAX=7, ECX=1).EDX
-		fs.setIf(edx&(1<<4) != 0, AVXVNNIINT8)
-		fs.setIf(edx&(1<<5) != 0, AVXNECONVERT)
-		fs.setIf(edx&(1<<14) != 0, PREFETCHI)
-
 		// CPUID.(EAX=7, ECX=1).EAX
-		eax1, _, _, _ := cpuidex(7, 1)
+		eax1, _, _, edx1 := cpuidex(7, 1)
 		fs.setIf(fs.inSet(AVX) && eax1&(1<<4) != 0, AVXVNNI)
 		fs.setIf(eax1&(1<<7) != 0, CMPCCXADD)
 		fs.setIf(eax1&(1<<10) != 0, MOVSB_ZL)
@@ -1197,6 +1239,13 @@ func support() flagSet {
 		fs.setIf(eax1&(1<<23) != 0, AVXIFMA)
 		fs.setIf(eax1&(1<<26) != 0, LAM)

+		// CPUID.(EAX=7, ECX=1).EDX
+		fs.setIf(edx1&(1<<4) != 0, AVXVNNIINT8)
+		fs.setIf(edx1&(1<<5) != 0, AVXNECONVERT)
+		fs.setIf(edx1&(1<<14) != 0, PREFETCHI)
+		fs.setIf(edx1&(1<<19) != 0, AVX10)
+		fs.setIf(edx1&(1<<21) != 0, APX_F)
+
 		// Only detect AVX-512 features if XGETBV is supported
 		if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) {
 			// Check for OS support
@@ -1232,13 +1281,33 @@ func support() flagSet {
 				fs.setIf(edx&(1<<25) != 0, AMXINT8)
 				// eax1 = CPUID.(EAX=7, ECX=1).EAX
 				fs.setIf(eax1&(1<<5) != 0, AVX512BF16)
+				fs.setIf(eax1&(1<<19) != 0, WRMSRNS)
 				fs.setIf(eax1&(1<<21) != 0, AMXFP16)
+				fs.setIf(eax1&(1<<27) != 0, MSRLIST)
 			}
 		}

 		// CPUID.(EAX=7, ECX=2)
 		_, _, _, edx = cpuidex(7, 2)
+		fs.setIf(edx&(1<<0) != 0, PSFD)
+		fs.setIf(edx&(1<<1) != 0, IDPRED_CTRL)
+		fs.setIf(edx&(1<<2) != 0, RRSBA_CTRL)
+		fs.setIf(edx&(1<<4) != 0, BHI_CTRL)
 		fs.setIf(edx&(1<<5) != 0, MCDT_NO)
+
+		// Add keylocker features.
+		if fs.inSet(KEYLOCKER) && mfi >= 0x19 {
+			_, ebx, _, _ := cpuidex(0x19, 0)
+			fs.setIf(ebx&5 == 5, KEYLOCKERW) // Bit 0 and 2 (1+4)
+		}
+
+		// Add AVX10 features.
+		if fs.inSet(AVX10) && mfi >= 0x24 {
+			_, ebx, _, _ := cpuidex(0x24, 0)
+			fs.setIf(ebx&(1<<16) != 0, AVX10_128)
+			fs.setIf(ebx&(1<<17) != 0, AVX10_256)
+			fs.setIf(ebx&(1<<18) != 0, AVX10_512)
+		}
 	}

 	// Processor Extended State Enumeration Sub-leaf (EAX = 0DH, ECX = 1)
@@ -1381,9 +1450,47 @@ func support() flagSet {
 		fs.setIf((a>>24)&1 == 1, VMSA_REGPROT)
 	}

+	if maxExtendedFunction() >= 0x80000021 && vend == AMD {
+		a, _, _, _ := cpuid(0x80000021)
+		fs.setIf((a>>31)&1 == 1, SRSO_MSR_FIX)
+		fs.setIf((a>>30)&1 == 1, SRSO_USER_KERNEL_NO)
+		fs.setIf((a>>29)&1 == 1, SRSO_NO)
+		fs.setIf((a>>28)&1 == 1, IBPB_BRTYPE)
+		fs.setIf((a>>27)&1 == 1, SBPB)
+	}
+
+	if mfi >= 0x20 {
+		// Microsoft has decided to purposefully hide the information
+		// of the guest TEE when VMs are being created using Hyper-V.
+		//
+		// This leads us to check for the Hyper-V cpuid features
+		// (0x4000000C), and then for the `ebx` value set.
+		//
+		// For Intel TDX, `ebx` is set as `0xbe3`, being 3 the part
+		// we're mostly interested about,according to:
+		// https://github.com/torvalds/linux/blob/d2f51b3516dade79269ff45eae2a7668ae711b25/arch/x86/include/asm/hyperv-tlfs.h#L169-L174
+		_, ebx, _, _ := cpuid(0x4000000C)
+		fs.setIf(ebx == 0xbe3, TDX_GUEST)
+	}
+
+	if mfi >= 0x21 {
+		// Intel Trusted Domain Extensions Guests have their own cpuid leaf (0x21).
+		_, ebx, ecx, edx := cpuid(0x21)
+		identity := string(valAsString(ebx, edx, ecx))
+		fs.setIf(identity == "IntelTDX    ", TDX_GUEST)
+	}
+
 	return fs
 }

+func (c *CPUInfo) supportAVX10() uint8 {
+	if c.maxFunc >= 0x24 && c.featureSet.inSet(AVX10) {
+		_, ebx, _, _ := cpuidex(0x24, 0)
+		return uint8(ebx)
+	}
+	return 0
+}
+
 func valAsString(values ...uint32) []byte {
 	r := make([]byte, 4*len(values))
 	for i, v := range values {
--- a/vendor/github.com/klauspost/cpuid/v2/detect_x86.go
+++ b/vendor/github.com/klauspost/cpuid/v2/detect_x86.go
@@ -27,10 +27,12 @@ func addInfo(c *CPUInfo, safe bool) {
 	c.Family, c.Model, c.Stepping = familyModel()
 	c.featureSet = support()
 	c.SGX = hasSGX(c.featureSet.inSet(SGX), c.featureSet.inSet(SGXLC))
+	c.AMDMemEncryption = hasAMDMemEncryption(c.featureSet.inSet(SME) || c.featureSet.inSet(SEV))
 	c.ThreadsPerCore = threadsPerCore()
 	c.LogicalCores = logicalCores()
 	c.PhysicalCores = physicalCores()
 	c.VendorID, c.VendorString = vendorID()
+	c.AVX10Level = c.supportAVX10()
 	c.cacheSize()
 	c.frequencies()
 }
--- a/vendor/github.com/klauspost/cpuid/v2/featureid_string.go
+++ b/vendor/github.com/klauspost/cpuid/v2/featureid_string.go
@@ -16,204 +16,222 @@ func _() {
 	_ = x[AMXFP16-6]
 	_ = x[AMXINT8-7]
 	_ = x[AMXTILE-8]
-	_ = x[AVX-9]
-	_ = x[AVX2-10]
-	_ = x[AVX512BF16-11]
-	_ = x[AVX512BITALG-12]
-	_ = x[AVX512BW-13]
-	_ = x[AVX512CD-14]
-	_ = x[AVX512DQ-15]
-	_ = x[AVX512ER-16]
-	_ = x[AVX512F-17]
-	_ = x[AVX512FP16-18]
-	_ = x[AVX512IFMA-19]
-	_ = x[AVX512PF-20]
-	_ = x[AVX512VBMI-21]
-	_ = x[AVX512VBMI2-22]
-	_ = x[AVX512VL-23]
-	_ = x[AVX512VNNI-24]
-	_ = x[AVX512VP2INTERSECT-25]
-	_ = x[AVX512VPOPCNTDQ-26]
-	_ = x[AVXIFMA-27]
-	_ = x[AVXNECONVERT-28]
-	_ = x[AVXSLOW-29]
-	_ = x[AVXVNNI-30]
-	_ = x[AVXVNNIINT8-31]
-	_ = x[BMI1-32]
-	_ = x[BMI2-33]
-	_ = x[CETIBT-34]
-	_ = x[CETSS-35]
-	_ = x[CLDEMOTE-36]
-	_ = x[CLMUL-37]
-	_ = x[CLZERO-38]
-	_ = x[CMOV-39]
-	_ = x[CMPCCXADD-40]
-	_ = x[CMPSB_SCADBS_SHORT-41]
-	_ = x[CMPXCHG8-42]
-	_ = x[CPBOOST-43]
-	_ = x[CPPC-44]
-	_ = x[CX16-45]
-	_ = x[EFER_LMSLE_UNS-46]
-	_ = x[ENQCMD-47]
-	_ = x[ERMS-48]
-	_ = x[F16C-49]
-	_ = x[FLUSH_L1D-50]
-	_ = x[FMA3-51]
-	_ = x[FMA4-52]
-	_ = x[FP128-53]
-	_ = x[FP256-54]
-	_ = x[FSRM-55]
-	_ = x[FXSR-56]
-	_ = x[FXSROPT-57]
-	_ = x[GFNI-58]
-	_ = x[HLE-59]
-	_ = x[HRESET-60]
-	_ = x[HTT-61]
-	_ = x[HWA-62]
-	_ = x[HYBRID_CPU-63]
-	_ = x[HYPERVISOR-64]
-	_ = x[IA32_ARCH_CAP-65]
-	_ = x[IA32_CORE_CAP-66]
-	_ = x[IBPB-67]
-	_ = x[IBRS-68]
-	_ = x[IBRS_PREFERRED-69]
-	_ = x[IBRS_PROVIDES_SMP-70]
-	_ = x[IBS-71]
-	_ = x[IBSBRNTRGT-72]
-	_ = x[IBSFETCHSAM-73]
-	_ = x[IBSFFV-74]
-	_ = x[IBSOPCNT-75]
-	_ = x[IBSOPCNTEXT-76]
-	_ = x[IBSOPSAM-77]
-	_ = x[IBSRDWROPCNT-78]
-	_ = x[IBSRIPINVALIDCHK-79]
-	_ = x[IBS_FETCH_CTLX-80]
-	_ = x[IBS_OPDATA4-81]
-	_ = x[IBS_OPFUSE-82]
-	_ = x[IBS_PREVENTHOST-83]
-	_ = x[IBS_ZEN4-84]
-	_ = x[INT_WBINVD-85]
-	_ = x[INVLPGB-86]
-	_ = x[LAHF-87]
-	_ = x[LAM-88]
-	_ = x[LBRVIRT-89]
-	_ = x[LZCNT-90]
-	_ = x[MCAOVERFLOW-91]
-	_ = x[MCDT_NO-92]
-	_ = x[MCOMMIT-93]
-	_ = x[MD_CLEAR-94]
-	_ = x[MMX-95]
-	_ = x[MMXEXT-96]
-	_ = x[MOVBE-97]
-	_ = x[MOVDIR64B-98]
-	_ = x[MOVDIRI-99]
-	_ = x[MOVSB_ZL-100]
-	_ = x[MOVU-101]
-	_ = x[MPX-102]
-	_ = x[MSRIRC-103]
-	_ = x[MSR_PAGEFLUSH-104]
-	_ = x[NRIPS-105]
-	_ = x[NX-106]
-	_ = x[OSXSAVE-107]
-	_ = x[PCONFIG-108]
-	_ = x[POPCNT-109]
-	_ = x[PPIN-110]
-	_ = x[PREFETCHI-111]
-	_ = x[PSFD-112]
-	_ = x[RDPRU-113]
-	_ = x[RDRAND-114]
-	_ = x[RDSEED-115]
-	_ = x[RDTSCP-116]
-	_ = x[RTM-117]
-	_ = x[RTM_ALWAYS_ABORT-118]
-	_ = x[SERIALIZE-119]
-	_ = x[SEV-120]
-	_ = x[SEV_64BIT-121]
-	_ = x[SEV_ALTERNATIVE-122]
-	_ = x[SEV_DEBUGSWAP-123]
-	_ = x[SEV_ES-124]
-	_ = x[SEV_RESTRICTED-125]
-	_ = x[SEV_SNP-126]
-	_ = x[SGX-127]
-	_ = x[SGXLC-128]
-	_ = x[SHA-129]
-	_ = x[SME-130]
-	_ = x[SME_COHERENT-131]
-	_ = x[SPEC_CTRL_SSBD-132]
-	_ = x[SRBDS_CTRL-133]
-	_ = x[SSE-134]
-	_ = x[SSE2-135]
-	_ = x[SSE3-136]
-	_ = x[SSE4-137]
-	_ = x[SSE42-138]
-	_ = x[SSE4A-139]
-	_ = x[SSSE3-140]
-	_ = x[STIBP-141]
-	_ = x[STIBP_ALWAYSON-142]
-	_ = x[STOSB_SHORT-143]
-	_ = x[SUCCOR-144]
-	_ = x[SVM-145]
-	_ = x[SVMDA-146]
-	_ = x[SVMFBASID-147]
-	_ = x[SVML-148]
-	_ = x[SVMNP-149]
-	_ = x[SVMPF-150]
-	_ = x[SVMPFT-151]
-	_ = x[SYSCALL-152]
-	_ = x[SYSEE-153]
-	_ = x[TBM-154]
-	_ = x[TLB_FLUSH_NESTED-155]
-	_ = x[TME-156]
-	_ = x[TOPEXT-157]
-	_ = x[TSCRATEMSR-158]
-	_ = x[TSXLDTRK-159]
-	_ = x[VAES-160]
-	_ = x[VMCBCLEAN-161]
-	_ = x[VMPL-162]
-	_ = x[VMSA_REGPROT-163]
-	_ = x[VMX-164]
-	_ = x[VPCLMULQDQ-165]
-	_ = x[VTE-166]
-	_ = x[WAITPKG-167]
-	_ = x[WBNOINVD-168]
-	_ = x[X87-169]
-	_ = x[XGETBV1-170]
-	_ = x[XOP-171]
-	_ = x[XSAVE-172]
-	_ = x[XSAVEC-173]
-	_ = x[XSAVEOPT-174]
-	_ = x[XSAVES-175]
-	_ = x[AESARM-176]
-	_ = x[ARMCPUID-177]
-	_ = x[ASIMD-178]
-	_ = x[ASIMDDP-179]
-	_ = x[ASIMDHP-180]
-	_ = x[ASIMDRDM-181]
-	_ = x[ATOMICS-182]
-	_ = x[CRC32-183]
-	_ = x[DCPOP-184]
-	_ = x[EVTSTRM-185]
-	_ = x[FCMA-186]
-	_ = x[FP-187]
-	_ = x[FPHP-188]
-	_ = x[GPA-189]
-	_ = x[JSCVT-190]
-	_ = x[LRCPC-191]
-	_ = x[PMULL-192]
-	_ = x[SHA1-193]
-	_ = x[SHA2-194]
-	_ = x[SHA3-195]
-	_ = x[SHA512-196]
-	_ = x[SM3-197]
-	_ = x[SM4-198]
-	_ = x[SVE-199]
-	_ = x[lastID-200]
+	_ = x[APX_F-9]
+	_ = x[AVX-10]
+	_ = x[AVX10-11]
+	_ = x[AVX10_128-12]
+	_ = x[AVX10_256-13]
+	_ = x[AVX10_512-14]
+	_ = x[AVX2-15]
+	_ = x[AVX512BF16-16]
+	_ = x[AVX512BITALG-17]
+	_ = x[AVX512BW-18]
+	_ = x[AVX512CD-19]
+	_ = x[AVX512DQ-20]
+	_ = x[AVX512ER-21]
+	_ = x[AVX512F-22]
+	_ = x[AVX512FP16-23]
+	_ = x[AVX512IFMA-24]
+	_ = x[AVX512PF-25]
+	_ = x[AVX512VBMI-26]
+	_ = x[AVX512VBMI2-27]
+	_ = x[AVX512VL-28]
+	_ = x[AVX512VNNI-29]
+	_ = x[AVX512VP2INTERSECT-30]
+	_ = x[AVX512VPOPCNTDQ-31]
+	_ = x[AVXIFMA-32]
+	_ = x[AVXNECONVERT-33]
+	_ = x[AVXSLOW-34]
+	_ = x[AVXVNNI-35]
+	_ = x[AVXVNNIINT8-36]
+	_ = x[BHI_CTRL-37]
+	_ = x[BMI1-38]
+	_ = x[BMI2-39]
+	_ = x[CETIBT-40]
+	_ = x[CETSS-41]
+	_ = x[CLDEMOTE-42]
+	_ = x[CLMUL-43]
+	_ = x[CLZERO-44]
+	_ = x[CMOV-45]
+	_ = x[CMPCCXADD-46]
+	_ = x[CMPSB_SCADBS_SHORT-47]
+	_ = x[CMPXCHG8-48]
+	_ = x[CPBOOST-49]
+	_ = x[CPPC-50]
+	_ = x[CX16-51]
+	_ = x[EFER_LMSLE_UNS-52]
+	_ = x[ENQCMD-53]
+	_ = x[ERMS-54]
+	_ = x[F16C-55]
+	_ = x[FLUSH_L1D-56]
+	_ = x[FMA3-57]
+	_ = x[FMA4-58]
+	_ = x[FP128-59]
+	_ = x[FP256-60]
+	_ = x[FSRM-61]
+	_ = x[FXSR-62]
+	_ = x[FXSROPT-63]
+	_ = x[GFNI-64]
+	_ = x[HLE-65]
+	_ = x[HRESET-66]
+	_ = x[HTT-67]
+	_ = x[HWA-68]
+	_ = x[HYBRID_CPU-69]
+	_ = x[HYPERVISOR-70]
+	_ = x[IA32_ARCH_CAP-71]
+	_ = x[IA32_CORE_CAP-72]
+	_ = x[IBPB-73]
+	_ = x[IBPB_BRTYPE-74]
+	_ = x[IBRS-75]
+	_ = x[IBRS_PREFERRED-76]
+	_ = x[IBRS_PROVIDES_SMP-77]
+	_ = x[IBS-78]
+	_ = x[IBSBRNTRGT-79]
+	_ = x[IBSFETCHSAM-80]
+	_ = x[IBSFFV-81]
+	_ = x[IBSOPCNT-82]
+	_ = x[IBSOPCNTEXT-83]
+	_ = x[IBSOPSAM-84]
+	_ = x[IBSRDWROPCNT-85]
+	_ = x[IBSRIPINVALIDCHK-86]
+	_ = x[IBS_FETCH_CTLX-87]
+	_ = x[IBS_OPDATA4-88]
+	_ = x[IBS_OPFUSE-89]
+	_ = x[IBS_PREVENTHOST-90]
+	_ = x[IBS_ZEN4-91]
+	_ = x[IDPRED_CTRL-92]
+	_ = x[INT_WBINVD-93]
+	_ = x[INVLPGB-94]
+	_ = x[KEYLOCKER-95]
+	_ = x[KEYLOCKERW-96]
+	_ = x[LAHF-97]
+	_ = x[LAM-98]
+	_ = x[LBRVIRT-99]
+	_ = x[LZCNT-100]
+	_ = x[MCAOVERFLOW-101]
+	_ = x[MCDT_NO-102]
+	_ = x[MCOMMIT-103]
+	_ = x[MD_CLEAR-104]
+	_ = x[MMX-105]
+	_ = x[MMXEXT-106]
+	_ = x[MOVBE-107]
+	_ = x[MOVDIR64B-108]
+	_ = x[MOVDIRI-109]
+	_ = x[MOVSB_ZL-110]
+	_ = x[MOVU-111]
+	_ = x[MPX-112]
+	_ = x[MSRIRC-113]
+	_ = x[MSRLIST-114]
+	_ = x[MSR_PAGEFLUSH-115]
+	_ = x[NRIPS-116]
+	_ = x[NX-117]
+	_ = x[OSXSAVE-118]
+	_ = x[PCONFIG-119]
+	_ = x[POPCNT-120]
+	_ = x[PPIN-121]
+	_ = x[PREFETCHI-122]
+	_ = x[PSFD-123]
+	_ = x[RDPRU-124]
+	_ = x[RDRAND-125]
+	_ = x[RDSEED-126]
+	_ = x[RDTSCP-127]
+	_ = x[RRSBA_CTRL-128]
+	_ = x[RTM-129]
+	_ = x[RTM_ALWAYS_ABORT-130]
+	_ = x[SBPB-131]
+	_ = x[SERIALIZE-132]
+	_ = x[SEV-133]
+	_ = x[SEV_64BIT-134]
+	_ = x[SEV_ALTERNATIVE-135]
+	_ = x[SEV_DEBUGSWAP-136]
+	_ = x[SEV_ES-137]
+	_ = x[SEV_RESTRICTED-138]
+	_ = x[SEV_SNP-139]
+	_ = x[SGX-140]
+	_ = x[SGXLC-141]
+	_ = x[SHA-142]
+	_ = x[SME-143]
+	_ = x[SME_COHERENT-144]
+	_ = x[SPEC_CTRL_SSBD-145]
+	_ = x[SRBDS_CTRL-146]
+	_ = x[SRSO_MSR_FIX-147]
+	_ = x[SRSO_NO-148]
+	_ = x[SRSO_USER_KERNEL_NO-149]
+	_ = x[SSE-150]
+	_ = x[SSE2-151]
+	_ = x[SSE3-152]
+	_ = x[SSE4-153]
+	_ = x[SSE42-154]
+	_ = x[SSE4A-155]
+	_ = x[SSSE3-156]
+	_ = x[STIBP-157]
+	_ = x[STIBP_ALWAYSON-158]
+	_ = x[STOSB_SHORT-159]
+	_ = x[SUCCOR-160]
+	_ = x[SVM-161]
+	_ = x[SVMDA-162]
+	_ = x[SVMFBASID-163]
+	_ = x[SVML-164]
+	_ = x[SVMNP-165]
+	_ = x[SVMPF-166]
+	_ = x[SVMPFT-167]
+	_ = x[SYSCALL-168]
+	_ = x[SYSEE-169]
+	_ = x[TBM-170]
+	_ = x[TDX_GUEST-171]
+	_ = x[TLB_FLUSH_NESTED-172]
+	_ = x[TME-173]
+	_ = x[TOPEXT-174]
+	_ = x[TSCRATEMSR-175]
+	_ = x[TSXLDTRK-176]
+	_ = x[VAES-177]
+	_ = x[VMCBCLEAN-178]
+	_ = x[VMPL-179]
+	_ = x[VMSA_REGPROT-180]
+	_ = x[VMX-181]
+	_ = x[VPCLMULQDQ-182]
+	_ = x[VTE-183]
+	_ = x[WAITPKG-184]
+	_ = x[WBNOINVD-185]
+	_ = x[WRMSRNS-186]
+	_ = x[X87-187]
+	_ = x[XGETBV1-188]
+	_ = x[XOP-189]
+	_ = x[XSAVE-190]
+	_ = x[XSAVEC-191]
+	_ = x[XSAVEOPT-192]
+	_ = x[XSAVES-193]
+	_ = x[AESARM-194]
+	_ = x[ARMCPUID-195]
+	_ = x[ASIMD-196]
+	_ = x[ASIMDDP-197]
+	_ = x[ASIMDHP-198]
+	_ = x[ASIMDRDM-199]
+	_ = x[ATOMICS-200]
+	_ = x[CRC32-201]
+	_ = x[DCPOP-202]
+	_ = x[EVTSTRM-203]
+	_ = x[FCMA-204]
+	_ = x[FP-205]
+	_ = x[FPHP-206]
+	_ = x[GPA-207]
+	_ = x[JSCVT-208]
+	_ = x[LRCPC-209]
+	_ = x[PMULL-210]
+	_ = x[SHA1-211]
+	_ = x[SHA2-212]
+	_ = x[SHA3-213]
+	_ = x[SHA512-214]
+	_ = x[SM3-215]
+	_ = x[SM4-216]
+	_ = x[SVE-217]
+	_ = x[lastID-218]
 	_ = x[firstID-0]
 }

-const _FeatureID_name = "firstIDADXAESNIAMD3DNOWAMD3DNOWEXTAMXBF16AMXFP16AMXINT8AMXTILEAVXAVX2AVX512BF16AVX512BITALGAVX512BWAVX512CDAVX512DQAVX512ERAVX512FAVX512FP16AVX512IFMAAVX512PFAVX512VBMIAVX512VBMI2AVX512VLAVX512VNNIAVX512VP2INTERSECTAVX512VPOPCNTDQAVXIFMAAVXNECONVERTAVXSLOWAVXVNNIAVXVNNIINT8BMI1BMI2CETIBTCETSSCLDEMOTECLMULCLZEROCMOVCMPCCXADDCMPSB_SCADBS_SHORTCMPXCHG8CPBOOSTCPPCCX16EFER_LMSLE_UNSENQCMDERMSF16CFLUSH_L1DFMA3FMA4FP128FP256FSRMFXSRFXSROPTGFNIHLEHRESETHTTHWAHYBRID_CPUHYPERVISORIA32_ARCH_CAPIA32_CORE_CAPIBPBIBRSIBRS_PREFERREDIBRS_PROVIDES_SMPIBSIBSBRNTRGTIBSFETCHSAMIBSFFVIBSOPCNTIBSOPCNTEXTIBSOPSAMIBSRDWROPCNTIBSRIPINVALIDCHKIBS_FETCH_CTLXIBS_OPDATA4IBS_OPFUSEIBS_PREVENTHOSTIBS_ZEN4INT_WBINVDINVLPGBLAHFLAMLBRVIRTLZCNTMCAOVERFLOWMCDT_NOMCOMMITMD_CLEARMMXMMXEXTMOVBEMOVDIR64BMOVDIRIMOVSB_ZLMOVUMPXMSRIRCMSR_PAGEFLUSHNRIPSNXOSXSAVEPCONFIGPOPCNTPPINPREFETCHIPSFDRDPRURDRANDRDSEEDRDTSCPRTMRTM_ALWAYS_ABORTSERIALIZESEVSEV_64BITSEV_ALTERNATIVESEV_DEBUGSWAPSEV_ESSEV_RESTRICTEDSEV_SNPSGXSGXLCSHASMESME_COHERENTSPEC_CTRL_SSBDSRBDS_CTRLSSESSE2SSE3SSE4SSE42SSE4ASSSE3STIBPSTIBP_ALWAYSONSTOSB_SHORTSUCCORSVMSVMDASVMFBASIDSVMLSVMNPSVMPFSVMPFTSYSCALLSYSEETBMTLB_FLUSH_NESTEDTMETOPEXTTSCRATEMSRTSXLDTRKVAESVMCBCLEANVMPLVMSA_REGPROTVMXVPCLMULQDQVTEWAITPKGWBNOINVDX87XGETBV1XOPXSAVEXSAVECXSAVEOPTXSAVESAESARMARMCPUIDASIMDASIMDDPASIMDHPASIMDRDMATOMICSCRC32DCPOPEVTSTRMFCMAFPFPHPGPAJSCVTLRCPCPMULLSHA1SHA2SHA3SHA512SM3SM4SVElastID"
+const _FeatureID_name = "firstIDADXAESNIAMD3DNOWAMD3DNOWEXTAMXBF16AMXFP16AMXINT8AMXTILEAPX_FAVXAVX10AVX10_128AVX10_256AVX10_512AVX2AVX512BF16AVX512BITALGAVX512BWAVX512CDAVX512DQAVX512ERAVX512FAVX512FP16AVX512IFMAAVX512PFAVX512VBMIAVX512VBMI2AVX512VLAVX512VNNIAVX512VP2INTERSECTAVX512VPOPCNTDQAVXIFMAAVXNECONVERTAVXSLOWAVXVNNIAVXVNNIINT8BHI_CTRLBMI1BMI2CETIBTCETSSCLDEMOTECLMULCLZEROCMOVCMPCCXADDCMPSB_SCADBS_SHORTCMPXCHG8CPBOOSTCPPCCX16EFER_LMSLE_UNSENQCMDERMSF16CFLUSH_L1DFMA3FMA4FP128FP256FSRMFXSRFXSROPTGFNIHLEHRESETHTTHWAHYBRID_CPUHYPERVISORIA32_ARCH_CAPIA32_CORE_CAPIBPBIBPB_BRTYPEIBRSIBRS_PREFERREDIBRS_PROVIDES_SMPIBSIBSBRNTRGTIBSFETCHSAMIBSFFVIBSOPCNTIBSOPCNTEXTIBSOPSAMIBSRDWROPCNTIBSRIPINVALIDCHKIBS_FETCH_CTLXIBS_OPDATA4IBS_OPFUSEIBS_PREVENTHOSTIBS_ZEN4IDPRED_CTRLINT_WBINVDINVLPGBKEYLOCKERKEYLOCKERWLAHFLAMLBRVIRTLZCNTMCAOVERFLOWMCDT_NOMCOMMITMD_CLEARMMXMMXEXTMOVBEMOVDIR64BMOVDIRIMOVSB_ZLMOVUMPXMSRIRCMSRLISTMSR_PAGEFLUSHNRIPSNXOSXSAVEPCONFIGPOPCNTPPINPREFETCHIPSFDRDPRURDRANDRDSEEDRDTSCPRRSBA_CTRLRTMRTM_ALWAYS_ABORTSBPBSERIALIZESEVSEV_64BITSEV_ALTERNATIVESEV_DEBUGSWAPSEV_ESSEV_RESTRICTEDSEV_SNPSGXSGXLCSHASMESME_COHERENTSPEC_CTRL_SSBDSRBDS_CTRLSRSO_MSR_FIXSRSO_NOSRSO_USER_KERNEL_NOSSESSE2SSE3SSE4SSE42SSE4ASSSE3STIBPSTIBP_ALWAYSONSTOSB_SHORTSUCCORSVMSVMDASVMFBASIDSVMLSVMNPSVMPFSVMPFTSYSCALLSYSEETBMTDX_GUESTTLB_FLUSH_NESTEDTMETOPEXTTSCRATEMSRTSXLDTRKVAESVMCBCLEANVMPLVMSA_REGPROTVMXVPCLMULQDQVTEWAITPKGWBNOINVDWRMSRNSX87XGETBV1XOPXSAVEXSAVECXSAVEOPTXSAVESAESARMARMCPUIDASIMDASIMDDPASIMDHPASIMDRDMATOMICSCRC32DCPOPEVTSTRMFCMAFPFPHPGPAJSCVTLRCPCPMULLSHA1SHA2SHA3SHA512SM3SM4SVElastID"

-var _FeatureID_index = [...]uint16{0, 7, 10, 15, 23, 34, 41, 48, 55, 62, 65, 69, 79, 91, 99, 107, 115, 123, 130, 140, 150, 158, 168, 179, 187, 197, 215, 230, 237, 249, 256, 263, 274, 278, 282, 288, 293, 301, 306, 312, 316, 325, 343, 351, 358, 362, 366, 380, 386, 390, 394, 403, 407, 411, 416, 421, 425, 429, 436, 440, 443, 449, 452, 455, 465, 475, 488, 501, 505, 509, 523, 540, 543, 553, 564, 570, 578, 589, 597, 609, 625, 639, 650, 660, 675, 683, 693, 700, 704, 707, 714, 719, 730, 737, 744, 752, 755, 761, 766, 775, 782, 790, 794, 797, 803, 816, 821, 823, 830, 837, 843, 847, 856, 860, 865, 871, 877, 883, 886, 902, 911, 914, 923, 938, 951, 957, 971, 978, 981, 986, 989, 992, 1004, 1018, 1028, 1031, 1035, 1039, 1043, 1048, 1053, 1058, 1063, 1077, 1088, 1094, 1097, 1102, 1111, 1115, 1120, 1125, 1131, 1138, 1143, 1146, 1162, 1165, 1171, 1181, 1189, 1193, 1202, 1206, 1218, 1221, 1231, 1234, 1241, 1249, 1252, 1259, 1262, 1267, 1273, 1281, 1287, 1293, 1301, 1306, 1313, 1320, 1328, 1335, 1340, 1345, 1352, 1356, 1358, 1362, 1365, 1370, 1375, 1380, 1384, 1388, 1392, 1398, 1401, 1404, 1407, 1413}
+var _FeatureID_index = [...]uint16{0, 7, 10, 15, 23, 34, 41, 48, 55, 62, 67, 70, 75, 84, 93, 102, 106, 116, 128, 136, 144, 152, 160, 167, 177, 187, 195, 205, 216, 224, 234, 252, 267, 274, 286, 293, 300, 311, 319, 323, 327, 333, 338, 346, 351, 357, 361, 370, 388, 396, 403, 407, 411, 425, 431, 435, 439, 448, 452, 456, 461, 466, 470, 474, 481, 485, 488, 494, 497, 500, 510, 520, 533, 546, 550, 561, 565, 579, 596, 599, 609, 620, 626, 634, 645, 653, 665, 681, 695, 706, 716, 731, 739, 750, 760, 767, 776, 786, 790, 793, 800, 805, 816, 823, 830, 838, 841, 847, 852, 861, 868, 876, 880, 883, 889, 896, 909, 914, 916, 923, 930, 936, 940, 949, 953, 958, 964, 970, 976, 986, 989, 1005, 1009, 1018, 1021, 1030, 1045, 1058, 1064, 1078, 1085, 1088, 1093, 1096, 1099, 1111, 1125, 1135, 1147, 1154, 1173, 1176, 1180, 1184, 1188, 1193, 1198, 1203, 1208, 1222, 1233, 1239, 1242, 1247, 1256, 1260, 1265, 1270, 1276, 1283, 1288, 1291, 1300, 1316, 1319, 1325, 1335, 1343, 1347, 1356, 1360, 1372, 1375, 1385, 1388, 1395, 1403, 1410, 1413, 1420, 1423, 1428, 1434, 1442, 1448, 1454, 1462, 1467, 1474, 1481, 1489, 1496, 1501, 1506, 1513, 1517, 1519, 1523, 1526, 1531, 1536, 1541, 1545, 1549, 1553, 1559, 1562, 1565, 1568, 1574}

 func (i FeatureID) String() string {
 	if i < 0 || i >= FeatureID(len(_FeatureID_index)-1) {