Update dependencies (#1800)
This commit is contained in:
		
							
								
								
									
										5
									
								
								vendor/github.com/klauspost/compress/huff0/autogen.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										5
									
								
								vendor/github.com/klauspost/compress/huff0/autogen.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,5 @@ | ||||
| package huff0 | ||||
|  | ||||
| //go:generate go run generate.go | ||||
| //go:generate asmfmt -w decompress_amd64.s | ||||
| //go:generate asmfmt -w decompress_8b_amd64.s | ||||
							
								
								
									
										126
									
								
								vendor/github.com/klauspost/compress/huff0/bitreader.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										126
									
								
								vendor/github.com/klauspost/compress/huff0/bitreader.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -8,115 +8,10 @@ package huff0 | ||||
| import ( | ||||
| 	"encoding/binary" | ||||
| 	"errors" | ||||
| 	"fmt" | ||||
| 	"io" | ||||
| ) | ||||
|  | ||||
| // bitReader reads a bitstream in reverse. | ||||
| // The last set bit indicates the start of the stream and is used | ||||
| // for aligning the input. | ||||
| type bitReader struct { | ||||
| 	in       []byte | ||||
| 	off      uint // next byte to read is at in[off - 1] | ||||
| 	value    uint64 | ||||
| 	bitsRead uint8 | ||||
| } | ||||
|  | ||||
| // init initializes and resets the bit reader. | ||||
| func (b *bitReader) init(in []byte) error { | ||||
| 	if len(in) < 1 { | ||||
| 		return errors.New("corrupt stream: too short") | ||||
| 	} | ||||
| 	b.in = in | ||||
| 	b.off = uint(len(in)) | ||||
| 	// The highest bit of the last byte indicates where to start | ||||
| 	v := in[len(in)-1] | ||||
| 	if v == 0 { | ||||
| 		return errors.New("corrupt stream, did not find end of stream") | ||||
| 	} | ||||
| 	b.bitsRead = 64 | ||||
| 	b.value = 0 | ||||
| 	if len(in) >= 8 { | ||||
| 		b.fillFastStart() | ||||
| 	} else { | ||||
| 		b.fill() | ||||
| 		b.fill() | ||||
| 	} | ||||
| 	b.bitsRead += 8 - uint8(highBit32(uint32(v))) | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| // peekBitsFast requires that at least one bit is requested every time. | ||||
| // There are no checks if the buffer is filled. | ||||
| func (b *bitReader) peekBitsFast(n uint8) uint16 { | ||||
| 	const regMask = 64 - 1 | ||||
| 	v := uint16((b.value << (b.bitsRead & regMask)) >> ((regMask + 1 - n) & regMask)) | ||||
| 	return v | ||||
| } | ||||
|  | ||||
| // fillFast() will make sure at least 32 bits are available. | ||||
| // There must be at least 4 bytes available. | ||||
| func (b *bitReader) fillFast() { | ||||
| 	if b.bitsRead < 32 { | ||||
| 		return | ||||
| 	} | ||||
|  | ||||
| 	// 2 bounds checks. | ||||
| 	v := b.in[b.off-4 : b.off] | ||||
| 	v = v[:4] | ||||
| 	low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) | ||||
| 	b.value = (b.value << 32) | uint64(low) | ||||
| 	b.bitsRead -= 32 | ||||
| 	b.off -= 4 | ||||
| } | ||||
|  | ||||
| func (b *bitReader) advance(n uint8) { | ||||
| 	b.bitsRead += n | ||||
| } | ||||
|  | ||||
| // fillFastStart() assumes the bitreader is empty and there is at least 8 bytes to read. | ||||
| func (b *bitReader) fillFastStart() { | ||||
| 	// Do single re-slice to avoid bounds checks. | ||||
| 	b.value = binary.LittleEndian.Uint64(b.in[b.off-8:]) | ||||
| 	b.bitsRead = 0 | ||||
| 	b.off -= 8 | ||||
| } | ||||
|  | ||||
| // fill() will make sure at least 32 bits are available. | ||||
| func (b *bitReader) fill() { | ||||
| 	if b.bitsRead < 32 { | ||||
| 		return | ||||
| 	} | ||||
| 	if b.off > 4 { | ||||
| 		v := b.in[b.off-4:] | ||||
| 		v = v[:4] | ||||
| 		low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) | ||||
| 		b.value = (b.value << 32) | uint64(low) | ||||
| 		b.bitsRead -= 32 | ||||
| 		b.off -= 4 | ||||
| 		return | ||||
| 	} | ||||
| 	for b.off > 0 { | ||||
| 		b.value = (b.value << 8) | uint64(b.in[b.off-1]) | ||||
| 		b.bitsRead -= 8 | ||||
| 		b.off-- | ||||
| 	} | ||||
| } | ||||
|  | ||||
| // finished returns true if all bits have been read from the bit stream. | ||||
| func (b *bitReader) finished() bool { | ||||
| 	return b.off == 0 && b.bitsRead >= 64 | ||||
| } | ||||
|  | ||||
| // close the bitstream and returns an error if out-of-buffer reads occurred. | ||||
| func (b *bitReader) close() error { | ||||
| 	// Release reference. | ||||
| 	b.in = nil | ||||
| 	if b.bitsRead > 64 { | ||||
| 		return io.ErrUnexpectedEOF | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| // bitReader reads a bitstream in reverse. | ||||
| // The last set bit indicates the start of the stream and is used | ||||
| // for aligning the input. | ||||
| @@ -213,10 +108,17 @@ func (b *bitReaderBytes) finished() bool { | ||||
| 	return b.off == 0 && b.bitsRead >= 64 | ||||
| } | ||||
|  | ||||
| func (b *bitReaderBytes) remaining() uint { | ||||
| 	return b.off*8 + uint(64-b.bitsRead) | ||||
| } | ||||
|  | ||||
| // close the bitstream and returns an error if out-of-buffer reads occurred. | ||||
| func (b *bitReaderBytes) close() error { | ||||
| 	// Release reference. | ||||
| 	b.in = nil | ||||
| 	if b.remaining() > 0 { | ||||
| 		return fmt.Errorf("corrupt input: %d bits remain on stream", b.remaining()) | ||||
| 	} | ||||
| 	if b.bitsRead > 64 { | ||||
| 		return io.ErrUnexpectedEOF | ||||
| 	} | ||||
| @@ -263,6 +165,11 @@ func (b *bitReaderShifted) peekBitsFast(n uint8) uint16 { | ||||
| 	return uint16(b.value >> ((64 - n) & 63)) | ||||
| } | ||||
|  | ||||
| // peekTopBits(n) is equvialent to peekBitFast(64 - n) | ||||
| func (b *bitReaderShifted) peekTopBits(n uint8) uint16 { | ||||
| 	return uint16(b.value >> n) | ||||
| } | ||||
|  | ||||
| func (b *bitReaderShifted) advance(n uint8) { | ||||
| 	b.bitsRead += n | ||||
| 	b.value <<= n & 63 | ||||
| @@ -318,10 +225,17 @@ func (b *bitReaderShifted) finished() bool { | ||||
| 	return b.off == 0 && b.bitsRead >= 64 | ||||
| } | ||||
|  | ||||
| func (b *bitReaderShifted) remaining() uint { | ||||
| 	return b.off*8 + uint(64-b.bitsRead) | ||||
| } | ||||
|  | ||||
| // close the bitstream and returns an error if out-of-buffer reads occurred. | ||||
| func (b *bitReaderShifted) close() error { | ||||
| 	// Release reference. | ||||
| 	b.in = nil | ||||
| 	if b.remaining() > 0 { | ||||
| 		return fmt.Errorf("corrupt input: %d bits remain on stream", b.remaining()) | ||||
| 	} | ||||
| 	if b.bitsRead > 64 { | ||||
| 		return io.ErrUnexpectedEOF | ||||
| 	} | ||||
|   | ||||
							
								
								
									
										9
									
								
								vendor/github.com/klauspost/compress/huff0/compress.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										9
									
								
								vendor/github.com/klauspost/compress/huff0/compress.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -2,6 +2,7 @@ package huff0 | ||||
|  | ||||
| import ( | ||||
| 	"fmt" | ||||
| 	"math" | ||||
| 	"runtime" | ||||
| 	"sync" | ||||
| ) | ||||
| @@ -289,6 +290,10 @@ func (s *Scratch) compress4X(src []byte) ([]byte, error) { | ||||
| 		if err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 		if len(s.Out)-idx > math.MaxUint16 { | ||||
| 			// We cannot store the size in the jump table | ||||
| 			return nil, ErrIncompressible | ||||
| 		} | ||||
| 		// Write compressed length as little endian before block. | ||||
| 		if i < 3 { | ||||
| 			// Last length is not written. | ||||
| @@ -332,6 +337,10 @@ func (s *Scratch) compress4Xp(src []byte) ([]byte, error) { | ||||
| 			return nil, errs[i] | ||||
| 		} | ||||
| 		o := s.tmpOut[i] | ||||
| 		if len(o) > math.MaxUint16 { | ||||
| 			// We cannot store the size in the jump table | ||||
| 			return nil, ErrIncompressible | ||||
| 		} | ||||
| 		// Write compressed length as little endian before block. | ||||
| 		if i < 3 { | ||||
| 			// Last length is not written. | ||||
|   | ||||
							
								
								
									
										493
									
								
								vendor/github.com/klauspost/compress/huff0/decompress.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										493
									
								
								vendor/github.com/klauspost/compress/huff0/decompress.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -4,6 +4,7 @@ import ( | ||||
| 	"errors" | ||||
| 	"fmt" | ||||
| 	"io" | ||||
| 	"sync" | ||||
|  | ||||
| 	"github.com/klauspost/compress/fse" | ||||
| ) | ||||
| @@ -216,6 +217,7 @@ func (s *Scratch) Decoder() *Decoder { | ||||
| 	return &Decoder{ | ||||
| 		dt:             s.dt, | ||||
| 		actualTableLog: s.actualTableLog, | ||||
| 		bufs:           &s.decPool, | ||||
| 	} | ||||
| } | ||||
|  | ||||
| @@ -223,6 +225,15 @@ func (s *Scratch) Decoder() *Decoder { | ||||
| type Decoder struct { | ||||
| 	dt             dTable | ||||
| 	actualTableLog uint8 | ||||
| 	bufs           *sync.Pool | ||||
| } | ||||
|  | ||||
| func (d *Decoder) buffer() *[4][256]byte { | ||||
| 	buf, ok := d.bufs.Get().(*[4][256]byte) | ||||
| 	if ok { | ||||
| 		return buf | ||||
| 	} | ||||
| 	return &[4][256]byte{} | ||||
| } | ||||
|  | ||||
| // Decompress1X will decompress a 1X encoded stream. | ||||
| @@ -249,7 +260,8 @@ func (d *Decoder) Decompress1X(dst, src []byte) ([]byte, error) { | ||||
| 	dt := d.dt.single[:tlSize] | ||||
|  | ||||
| 	// Use temp table to avoid bound checks/append penalty. | ||||
| 	var buf [256]byte | ||||
| 	bufs := d.buffer() | ||||
| 	buf := &bufs[0] | ||||
| 	var off uint8 | ||||
|  | ||||
| 	for br.off >= 8 { | ||||
| @@ -277,6 +289,7 @@ func (d *Decoder) Decompress1X(dst, src []byte) ([]byte, error) { | ||||
| 		if off == 0 { | ||||
| 			if len(dst)+256 > maxDecodedSize { | ||||
| 				br.close() | ||||
| 				d.bufs.Put(bufs) | ||||
| 				return nil, ErrMaxDecodedSizeExceeded | ||||
| 			} | ||||
| 			dst = append(dst, buf[:]...) | ||||
| @@ -284,6 +297,7 @@ func (d *Decoder) Decompress1X(dst, src []byte) ([]byte, error) { | ||||
| 	} | ||||
|  | ||||
| 	if len(dst)+int(off) > maxDecodedSize { | ||||
| 		d.bufs.Put(bufs) | ||||
| 		br.close() | ||||
| 		return nil, ErrMaxDecodedSizeExceeded | ||||
| 	} | ||||
| @@ -310,6 +324,7 @@ func (d *Decoder) Decompress1X(dst, src []byte) ([]byte, error) { | ||||
| 			} | ||||
| 		} | ||||
| 		if len(dst) >= maxDecodedSize { | ||||
| 			d.bufs.Put(bufs) | ||||
| 			br.close() | ||||
| 			return nil, ErrMaxDecodedSizeExceeded | ||||
| 		} | ||||
| @@ -319,6 +334,7 @@ func (d *Decoder) Decompress1X(dst, src []byte) ([]byte, error) { | ||||
| 		bitsLeft -= nBits | ||||
| 		dst = append(dst, uint8(v.entry>>8)) | ||||
| 	} | ||||
| 	d.bufs.Put(bufs) | ||||
| 	return dst, br.close() | ||||
| } | ||||
|  | ||||
| @@ -341,7 +357,8 @@ func (d *Decoder) decompress1X8Bit(dst, src []byte) ([]byte, error) { | ||||
| 	dt := d.dt.single[:256] | ||||
|  | ||||
| 	// Use temp table to avoid bound checks/append penalty. | ||||
| 	var buf [256]byte | ||||
| 	bufs := d.buffer() | ||||
| 	buf := &bufs[0] | ||||
| 	var off uint8 | ||||
|  | ||||
| 	switch d.actualTableLog { | ||||
| @@ -369,6 +386,7 @@ func (d *Decoder) decompress1X8Bit(dst, src []byte) ([]byte, error) { | ||||
| 			if off == 0 { | ||||
| 				if len(dst)+256 > maxDecodedSize { | ||||
| 					br.close() | ||||
| 					d.bufs.Put(bufs) | ||||
| 					return nil, ErrMaxDecodedSizeExceeded | ||||
| 				} | ||||
| 				dst = append(dst, buf[:]...) | ||||
| @@ -398,6 +416,7 @@ func (d *Decoder) decompress1X8Bit(dst, src []byte) ([]byte, error) { | ||||
| 			if off == 0 { | ||||
| 				if len(dst)+256 > maxDecodedSize { | ||||
| 					br.close() | ||||
| 					d.bufs.Put(bufs) | ||||
| 					return nil, ErrMaxDecodedSizeExceeded | ||||
| 				} | ||||
| 				dst = append(dst, buf[:]...) | ||||
| @@ -426,6 +445,7 @@ func (d *Decoder) decompress1X8Bit(dst, src []byte) ([]byte, error) { | ||||
| 			off += 4 | ||||
| 			if off == 0 { | ||||
| 				if len(dst)+256 > maxDecodedSize { | ||||
| 					d.bufs.Put(bufs) | ||||
| 					br.close() | ||||
| 					return nil, ErrMaxDecodedSizeExceeded | ||||
| 				} | ||||
| @@ -455,6 +475,7 @@ func (d *Decoder) decompress1X8Bit(dst, src []byte) ([]byte, error) { | ||||
| 			off += 4 | ||||
| 			if off == 0 { | ||||
| 				if len(dst)+256 > maxDecodedSize { | ||||
| 					d.bufs.Put(bufs) | ||||
| 					br.close() | ||||
| 					return nil, ErrMaxDecodedSizeExceeded | ||||
| 				} | ||||
| @@ -484,6 +505,7 @@ func (d *Decoder) decompress1X8Bit(dst, src []byte) ([]byte, error) { | ||||
| 			off += 4 | ||||
| 			if off == 0 { | ||||
| 				if len(dst)+256 > maxDecodedSize { | ||||
| 					d.bufs.Put(bufs) | ||||
| 					br.close() | ||||
| 					return nil, ErrMaxDecodedSizeExceeded | ||||
| 				} | ||||
| @@ -513,6 +535,7 @@ func (d *Decoder) decompress1X8Bit(dst, src []byte) ([]byte, error) { | ||||
| 			off += 4 | ||||
| 			if off == 0 { | ||||
| 				if len(dst)+256 > maxDecodedSize { | ||||
| 					d.bufs.Put(bufs) | ||||
| 					br.close() | ||||
| 					return nil, ErrMaxDecodedSizeExceeded | ||||
| 				} | ||||
| @@ -542,6 +565,7 @@ func (d *Decoder) decompress1X8Bit(dst, src []byte) ([]byte, error) { | ||||
| 			off += 4 | ||||
| 			if off == 0 { | ||||
| 				if len(dst)+256 > maxDecodedSize { | ||||
| 					d.bufs.Put(bufs) | ||||
| 					br.close() | ||||
| 					return nil, ErrMaxDecodedSizeExceeded | ||||
| 				} | ||||
| @@ -571,6 +595,7 @@ func (d *Decoder) decompress1X8Bit(dst, src []byte) ([]byte, error) { | ||||
| 			off += 4 | ||||
| 			if off == 0 { | ||||
| 				if len(dst)+256 > maxDecodedSize { | ||||
| 					d.bufs.Put(bufs) | ||||
| 					br.close() | ||||
| 					return nil, ErrMaxDecodedSizeExceeded | ||||
| 				} | ||||
| @@ -578,10 +603,12 @@ func (d *Decoder) decompress1X8Bit(dst, src []byte) ([]byte, error) { | ||||
| 			} | ||||
| 		} | ||||
| 	default: | ||||
| 		d.bufs.Put(bufs) | ||||
| 		return nil, fmt.Errorf("invalid tablelog: %d", d.actualTableLog) | ||||
| 	} | ||||
|  | ||||
| 	if len(dst)+int(off) > maxDecodedSize { | ||||
| 		d.bufs.Put(bufs) | ||||
| 		br.close() | ||||
| 		return nil, ErrMaxDecodedSizeExceeded | ||||
| 	} | ||||
| @@ -601,6 +628,7 @@ func (d *Decoder) decompress1X8Bit(dst, src []byte) ([]byte, error) { | ||||
| 		} | ||||
| 		if len(dst) >= maxDecodedSize { | ||||
| 			br.close() | ||||
| 			d.bufs.Put(bufs) | ||||
| 			return nil, ErrMaxDecodedSizeExceeded | ||||
| 		} | ||||
| 		v := dt[br.peekByteFast()>>shift] | ||||
| @@ -609,6 +637,7 @@ func (d *Decoder) decompress1X8Bit(dst, src []byte) ([]byte, error) { | ||||
| 		bitsLeft -= int8(nBits) | ||||
| 		dst = append(dst, uint8(v.entry>>8)) | ||||
| 	} | ||||
| 	d.bufs.Put(bufs) | ||||
| 	return dst, br.close() | ||||
| } | ||||
|  | ||||
| @@ -628,7 +657,8 @@ func (d *Decoder) decompress1X8BitExactly(dst, src []byte) ([]byte, error) { | ||||
| 	dt := d.dt.single[:256] | ||||
|  | ||||
| 	// Use temp table to avoid bound checks/append penalty. | ||||
| 	var buf [256]byte | ||||
| 	bufs := d.buffer() | ||||
| 	buf := &bufs[0] | ||||
| 	var off uint8 | ||||
|  | ||||
| 	const shift = 56 | ||||
| @@ -655,6 +685,7 @@ func (d *Decoder) decompress1X8BitExactly(dst, src []byte) ([]byte, error) { | ||||
| 		off += 4 | ||||
| 		if off == 0 { | ||||
| 			if len(dst)+256 > maxDecodedSize { | ||||
| 				d.bufs.Put(bufs) | ||||
| 				br.close() | ||||
| 				return nil, ErrMaxDecodedSizeExceeded | ||||
| 			} | ||||
| @@ -663,6 +694,7 @@ func (d *Decoder) decompress1X8BitExactly(dst, src []byte) ([]byte, error) { | ||||
| 	} | ||||
|  | ||||
| 	if len(dst)+int(off) > maxDecodedSize { | ||||
| 		d.bufs.Put(bufs) | ||||
| 		br.close() | ||||
| 		return nil, ErrMaxDecodedSizeExceeded | ||||
| 	} | ||||
| @@ -679,6 +711,7 @@ func (d *Decoder) decompress1X8BitExactly(dst, src []byte) ([]byte, error) { | ||||
| 			} | ||||
| 		} | ||||
| 		if len(dst) >= maxDecodedSize { | ||||
| 			d.bufs.Put(bufs) | ||||
| 			br.close() | ||||
| 			return nil, ErrMaxDecodedSizeExceeded | ||||
| 		} | ||||
| @@ -688,195 +721,10 @@ func (d *Decoder) decompress1X8BitExactly(dst, src []byte) ([]byte, error) { | ||||
| 		bitsLeft -= int8(nBits) | ||||
| 		dst = append(dst, uint8(v.entry>>8)) | ||||
| 	} | ||||
| 	d.bufs.Put(bufs) | ||||
| 	return dst, br.close() | ||||
| } | ||||
|  | ||||
| // Decompress4X will decompress a 4X encoded stream. | ||||
| // The length of the supplied input must match the end of a block exactly. | ||||
| // The *capacity* of the dst slice must match the destination size of | ||||
| // the uncompressed data exactly. | ||||
| func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) { | ||||
| 	if len(d.dt.single) == 0 { | ||||
| 		return nil, errors.New("no table loaded") | ||||
| 	} | ||||
| 	if len(src) < 6+(4*1) { | ||||
| 		return nil, errors.New("input too small") | ||||
| 	} | ||||
| 	if use8BitTables && d.actualTableLog <= 8 { | ||||
| 		return d.decompress4X8bit(dst, src) | ||||
| 	} | ||||
|  | ||||
| 	var br [4]bitReaderShifted | ||||
| 	start := 6 | ||||
| 	for i := 0; i < 3; i++ { | ||||
| 		length := int(src[i*2]) | (int(src[i*2+1]) << 8) | ||||
| 		if start+length >= len(src) { | ||||
| 			return nil, errors.New("truncated input (or invalid offset)") | ||||
| 		} | ||||
| 		err := br[i].init(src[start : start+length]) | ||||
| 		if err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 		start += length | ||||
| 	} | ||||
| 	err := br[3].init(src[start:]) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
|  | ||||
| 	// destination, offset to match first output | ||||
| 	dstSize := cap(dst) | ||||
| 	dst = dst[:dstSize] | ||||
| 	out := dst | ||||
| 	dstEvery := (dstSize + 3) / 4 | ||||
|  | ||||
| 	const tlSize = 1 << tableLogMax | ||||
| 	const tlMask = tlSize - 1 | ||||
| 	single := d.dt.single[:tlSize] | ||||
|  | ||||
| 	// Use temp table to avoid bound checks/append penalty. | ||||
| 	var buf [256]byte | ||||
| 	var off uint8 | ||||
| 	var decoded int | ||||
|  | ||||
| 	// Decode 2 values from each decoder/loop. | ||||
| 	const bufoff = 256 / 4 | ||||
| 	for { | ||||
| 		if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 { | ||||
| 			break | ||||
| 		} | ||||
|  | ||||
| 		{ | ||||
| 			const stream = 0 | ||||
| 			const stream2 = 1 | ||||
| 			br[stream].fillFast() | ||||
| 			br[stream2].fillFast() | ||||
|  | ||||
| 			val := br[stream].peekBitsFast(d.actualTableLog) | ||||
| 			val2 := br[stream2].peekBitsFast(d.actualTableLog) | ||||
| 			v := single[val&tlMask] | ||||
| 			v2 := single[val2&tlMask] | ||||
| 			br[stream].advance(uint8(v.entry)) | ||||
| 			br[stream2].advance(uint8(v2.entry)) | ||||
| 			buf[off+bufoff*stream] = uint8(v.entry >> 8) | ||||
| 			buf[off+bufoff*stream2] = uint8(v2.entry >> 8) | ||||
|  | ||||
| 			val = br[stream].peekBitsFast(d.actualTableLog) | ||||
| 			val2 = br[stream2].peekBitsFast(d.actualTableLog) | ||||
| 			v = single[val&tlMask] | ||||
| 			v2 = single[val2&tlMask] | ||||
| 			br[stream].advance(uint8(v.entry)) | ||||
| 			br[stream2].advance(uint8(v2.entry)) | ||||
| 			buf[off+bufoff*stream+1] = uint8(v.entry >> 8) | ||||
| 			buf[off+bufoff*stream2+1] = uint8(v2.entry >> 8) | ||||
| 		} | ||||
|  | ||||
| 		{ | ||||
| 			const stream = 2 | ||||
| 			const stream2 = 3 | ||||
| 			br[stream].fillFast() | ||||
| 			br[stream2].fillFast() | ||||
|  | ||||
| 			val := br[stream].peekBitsFast(d.actualTableLog) | ||||
| 			val2 := br[stream2].peekBitsFast(d.actualTableLog) | ||||
| 			v := single[val&tlMask] | ||||
| 			v2 := single[val2&tlMask] | ||||
| 			br[stream].advance(uint8(v.entry)) | ||||
| 			br[stream2].advance(uint8(v2.entry)) | ||||
| 			buf[off+bufoff*stream] = uint8(v.entry >> 8) | ||||
| 			buf[off+bufoff*stream2] = uint8(v2.entry >> 8) | ||||
|  | ||||
| 			val = br[stream].peekBitsFast(d.actualTableLog) | ||||
| 			val2 = br[stream2].peekBitsFast(d.actualTableLog) | ||||
| 			v = single[val&tlMask] | ||||
| 			v2 = single[val2&tlMask] | ||||
| 			br[stream].advance(uint8(v.entry)) | ||||
| 			br[stream2].advance(uint8(v2.entry)) | ||||
| 			buf[off+bufoff*stream+1] = uint8(v.entry >> 8) | ||||
| 			buf[off+bufoff*stream2+1] = uint8(v2.entry >> 8) | ||||
| 		} | ||||
|  | ||||
| 		off += 2 | ||||
|  | ||||
| 		if off == bufoff { | ||||
| 			if bufoff > dstEvery { | ||||
| 				return nil, errors.New("corruption detected: stream overrun 1") | ||||
| 			} | ||||
| 			copy(out, buf[:bufoff]) | ||||
| 			copy(out[dstEvery:], buf[bufoff:bufoff*2]) | ||||
| 			copy(out[dstEvery*2:], buf[bufoff*2:bufoff*3]) | ||||
| 			copy(out[dstEvery*3:], buf[bufoff*3:bufoff*4]) | ||||
| 			off = 0 | ||||
| 			out = out[bufoff:] | ||||
| 			decoded += 256 | ||||
| 			// There must at least be 3 buffers left. | ||||
| 			if len(out) < dstEvery*3 { | ||||
| 				return nil, errors.New("corruption detected: stream overrun 2") | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	if off > 0 { | ||||
| 		ioff := int(off) | ||||
| 		if len(out) < dstEvery*3+ioff { | ||||
| 			return nil, errors.New("corruption detected: stream overrun 3") | ||||
| 		} | ||||
| 		copy(out, buf[:off]) | ||||
| 		copy(out[dstEvery:dstEvery+ioff], buf[bufoff:bufoff*2]) | ||||
| 		copy(out[dstEvery*2:dstEvery*2+ioff], buf[bufoff*2:bufoff*3]) | ||||
| 		copy(out[dstEvery*3:dstEvery*3+ioff], buf[bufoff*3:bufoff*4]) | ||||
| 		decoded += int(off) * 4 | ||||
| 		out = out[off:] | ||||
| 	} | ||||
|  | ||||
| 	// Decode remaining. | ||||
| 	for i := range br { | ||||
| 		offset := dstEvery * i | ||||
| 		br := &br[i] | ||||
| 		bitsLeft := br.off*8 + uint(64-br.bitsRead) | ||||
| 		for bitsLeft > 0 { | ||||
| 			br.fill() | ||||
| 			if false && br.bitsRead >= 32 { | ||||
| 				if br.off >= 4 { | ||||
| 					v := br.in[br.off-4:] | ||||
| 					v = v[:4] | ||||
| 					low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) | ||||
| 					br.value = (br.value << 32) | uint64(low) | ||||
| 					br.bitsRead -= 32 | ||||
| 					br.off -= 4 | ||||
| 				} else { | ||||
| 					for br.off > 0 { | ||||
| 						br.value = (br.value << 8) | uint64(br.in[br.off-1]) | ||||
| 						br.bitsRead -= 8 | ||||
| 						br.off-- | ||||
| 					} | ||||
| 				} | ||||
| 			} | ||||
| 			// end inline... | ||||
| 			if offset >= len(out) { | ||||
| 				return nil, errors.New("corruption detected: stream overrun 4") | ||||
| 			} | ||||
|  | ||||
| 			// Read value and increment offset. | ||||
| 			val := br.peekBitsFast(d.actualTableLog) | ||||
| 			v := single[val&tlMask].entry | ||||
| 			nBits := uint8(v) | ||||
| 			br.advance(nBits) | ||||
| 			bitsLeft -= uint(nBits) | ||||
| 			out[offset] = uint8(v >> 8) | ||||
| 			offset++ | ||||
| 		} | ||||
| 		decoded += offset - dstEvery*i | ||||
| 		err = br.close() | ||||
| 		if err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 	} | ||||
| 	if dstSize != decoded { | ||||
| 		return nil, errors.New("corruption detected: short output block") | ||||
| 	} | ||||
| 	return dst, nil | ||||
| } | ||||
|  | ||||
| // Decompress4X will decompress a 4X encoded stream. | ||||
| // The length of the supplied input must match the end of a block exactly. | ||||
| // The *capacity* of the dst slice must match the destination size of | ||||
| @@ -916,12 +764,12 @@ func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) { | ||||
| 	single := d.dt.single[:tlSize] | ||||
|  | ||||
| 	// Use temp table to avoid bound checks/append penalty. | ||||
| 	var buf [256]byte | ||||
| 	buf := d.buffer() | ||||
| 	var off uint8 | ||||
| 	var decoded int | ||||
|  | ||||
| 	// Decode 4 values from each decoder/loop. | ||||
| 	const bufoff = 256 / 4 | ||||
| 	const bufoff = 256 | ||||
| 	for { | ||||
| 		if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 { | ||||
| 			break | ||||
| @@ -942,8 +790,8 @@ func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) { | ||||
| 			br1.value <<= v & 63 | ||||
| 			br2.bitsRead += uint8(v2) | ||||
| 			br2.value <<= v2 & 63 | ||||
| 			buf[off+bufoff*stream] = uint8(v >> 8) | ||||
| 			buf[off+bufoff*stream2] = uint8(v2 >> 8) | ||||
| 			buf[stream][off] = uint8(v >> 8) | ||||
| 			buf[stream2][off] = uint8(v2 >> 8) | ||||
|  | ||||
| 			v = single[uint8(br1.value>>shift)].entry | ||||
| 			v2 = single[uint8(br2.value>>shift)].entry | ||||
| @@ -951,8 +799,8 @@ func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) { | ||||
| 			br1.value <<= v & 63 | ||||
| 			br2.bitsRead += uint8(v2) | ||||
| 			br2.value <<= v2 & 63 | ||||
| 			buf[off+bufoff*stream+1] = uint8(v >> 8) | ||||
| 			buf[off+bufoff*stream2+1] = uint8(v2 >> 8) | ||||
| 			buf[stream][off+1] = uint8(v >> 8) | ||||
| 			buf[stream2][off+1] = uint8(v2 >> 8) | ||||
|  | ||||
| 			v = single[uint8(br1.value>>shift)].entry | ||||
| 			v2 = single[uint8(br2.value>>shift)].entry | ||||
| @@ -960,8 +808,8 @@ func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) { | ||||
| 			br1.value <<= v & 63 | ||||
| 			br2.bitsRead += uint8(v2) | ||||
| 			br2.value <<= v2 & 63 | ||||
| 			buf[off+bufoff*stream+2] = uint8(v >> 8) | ||||
| 			buf[off+bufoff*stream2+2] = uint8(v2 >> 8) | ||||
| 			buf[stream][off+2] = uint8(v >> 8) | ||||
| 			buf[stream2][off+2] = uint8(v2 >> 8) | ||||
|  | ||||
| 			v = single[uint8(br1.value>>shift)].entry | ||||
| 			v2 = single[uint8(br2.value>>shift)].entry | ||||
| @@ -969,8 +817,8 @@ func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) { | ||||
| 			br1.value <<= v & 63 | ||||
| 			br2.bitsRead += uint8(v2) | ||||
| 			br2.value <<= v2 & 63 | ||||
| 			buf[off+bufoff*stream2+3] = uint8(v2 >> 8) | ||||
| 			buf[off+bufoff*stream+3] = uint8(v >> 8) | ||||
| 			buf[stream][off+3] = uint8(v >> 8) | ||||
| 			buf[stream2][off+3] = uint8(v2 >> 8) | ||||
| 		} | ||||
|  | ||||
| 		{ | ||||
| @@ -987,8 +835,8 @@ func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) { | ||||
| 			br1.value <<= v & 63 | ||||
| 			br2.bitsRead += uint8(v2) | ||||
| 			br2.value <<= v2 & 63 | ||||
| 			buf[off+bufoff*stream] = uint8(v >> 8) | ||||
| 			buf[off+bufoff*stream2] = uint8(v2 >> 8) | ||||
| 			buf[stream][off] = uint8(v >> 8) | ||||
| 			buf[stream2][off] = uint8(v2 >> 8) | ||||
|  | ||||
| 			v = single[uint8(br1.value>>shift)].entry | ||||
| 			v2 = single[uint8(br2.value>>shift)].entry | ||||
| @@ -996,8 +844,8 @@ func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) { | ||||
| 			br1.value <<= v & 63 | ||||
| 			br2.bitsRead += uint8(v2) | ||||
| 			br2.value <<= v2 & 63 | ||||
| 			buf[off+bufoff*stream+1] = uint8(v >> 8) | ||||
| 			buf[off+bufoff*stream2+1] = uint8(v2 >> 8) | ||||
| 			buf[stream][off+1] = uint8(v >> 8) | ||||
| 			buf[stream2][off+1] = uint8(v2 >> 8) | ||||
|  | ||||
| 			v = single[uint8(br1.value>>shift)].entry | ||||
| 			v2 = single[uint8(br2.value>>shift)].entry | ||||
| @@ -1005,8 +853,8 @@ func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) { | ||||
| 			br1.value <<= v & 63 | ||||
| 			br2.bitsRead += uint8(v2) | ||||
| 			br2.value <<= v2 & 63 | ||||
| 			buf[off+bufoff*stream+2] = uint8(v >> 8) | ||||
| 			buf[off+bufoff*stream2+2] = uint8(v2 >> 8) | ||||
| 			buf[stream][off+2] = uint8(v >> 8) | ||||
| 			buf[stream2][off+2] = uint8(v2 >> 8) | ||||
|  | ||||
| 			v = single[uint8(br1.value>>shift)].entry | ||||
| 			v2 = single[uint8(br2.value>>shift)].entry | ||||
| @@ -1014,25 +862,26 @@ func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) { | ||||
| 			br1.value <<= v & 63 | ||||
| 			br2.bitsRead += uint8(v2) | ||||
| 			br2.value <<= v2 & 63 | ||||
| 			buf[off+bufoff*stream2+3] = uint8(v2 >> 8) | ||||
| 			buf[off+bufoff*stream+3] = uint8(v >> 8) | ||||
| 			buf[stream][off+3] = uint8(v >> 8) | ||||
| 			buf[stream2][off+3] = uint8(v2 >> 8) | ||||
| 		} | ||||
|  | ||||
| 		off += 4 | ||||
|  | ||||
| 		if off == bufoff { | ||||
| 		if off == 0 { | ||||
| 			if bufoff > dstEvery { | ||||
| 				d.bufs.Put(buf) | ||||
| 				return nil, errors.New("corruption detected: stream overrun 1") | ||||
| 			} | ||||
| 			copy(out, buf[:bufoff]) | ||||
| 			copy(out[dstEvery:], buf[bufoff:bufoff*2]) | ||||
| 			copy(out[dstEvery*2:], buf[bufoff*2:bufoff*3]) | ||||
| 			copy(out[dstEvery*3:], buf[bufoff*3:bufoff*4]) | ||||
| 			off = 0 | ||||
| 			copy(out, buf[0][:]) | ||||
| 			copy(out[dstEvery:], buf[1][:]) | ||||
| 			copy(out[dstEvery*2:], buf[2][:]) | ||||
| 			copy(out[dstEvery*3:], buf[3][:]) | ||||
| 			out = out[bufoff:] | ||||
| 			decoded += 256 | ||||
| 			decoded += bufoff * 4 | ||||
| 			// There must at least be 3 buffers left. | ||||
| 			if len(out) < dstEvery*3 { | ||||
| 				d.bufs.Put(buf) | ||||
| 				return nil, errors.New("corruption detected: stream overrun 2") | ||||
| 			} | ||||
| 		} | ||||
| @@ -1040,23 +889,31 @@ func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) { | ||||
| 	if off > 0 { | ||||
| 		ioff := int(off) | ||||
| 		if len(out) < dstEvery*3+ioff { | ||||
| 			d.bufs.Put(buf) | ||||
| 			return nil, errors.New("corruption detected: stream overrun 3") | ||||
| 		} | ||||
| 		copy(out, buf[:off]) | ||||
| 		copy(out[dstEvery:dstEvery+ioff], buf[bufoff:bufoff*2]) | ||||
| 		copy(out[dstEvery*2:dstEvery*2+ioff], buf[bufoff*2:bufoff*3]) | ||||
| 		copy(out[dstEvery*3:dstEvery*3+ioff], buf[bufoff*3:bufoff*4]) | ||||
| 		copy(out, buf[0][:off]) | ||||
| 		copy(out[dstEvery:], buf[1][:off]) | ||||
| 		copy(out[dstEvery*2:], buf[2][:off]) | ||||
| 		copy(out[dstEvery*3:], buf[3][:off]) | ||||
| 		decoded += int(off) * 4 | ||||
| 		out = out[off:] | ||||
| 	} | ||||
|  | ||||
| 	// Decode remaining. | ||||
| 	// Decode remaining. | ||||
| 	remainBytes := dstEvery - (decoded / 4) | ||||
| 	for i := range br { | ||||
| 		offset := dstEvery * i | ||||
| 		endsAt := offset + remainBytes | ||||
| 		if endsAt > len(out) { | ||||
| 			endsAt = len(out) | ||||
| 		} | ||||
| 		br := &br[i] | ||||
| 		bitsLeft := int(br.off*8) + int(64-br.bitsRead) | ||||
| 		bitsLeft := br.remaining() | ||||
| 		for bitsLeft > 0 { | ||||
| 			if br.finished() { | ||||
| 				d.bufs.Put(buf) | ||||
| 				return nil, io.ErrUnexpectedEOF | ||||
| 			} | ||||
| 			if br.bitsRead >= 56 { | ||||
| @@ -1076,7 +933,8 @@ func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) { | ||||
| 				} | ||||
| 			} | ||||
| 			// end inline... | ||||
| 			if offset >= len(out) { | ||||
| 			if offset >= endsAt { | ||||
| 				d.bufs.Put(buf) | ||||
| 				return nil, errors.New("corruption detected: stream overrun 4") | ||||
| 			} | ||||
|  | ||||
| @@ -1084,16 +942,22 @@ func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) { | ||||
| 			v := single[uint8(br.value>>shift)].entry | ||||
| 			nBits := uint8(v) | ||||
| 			br.advance(nBits) | ||||
| 			bitsLeft -= int(nBits) | ||||
| 			bitsLeft -= uint(nBits) | ||||
| 			out[offset] = uint8(v >> 8) | ||||
| 			offset++ | ||||
| 		} | ||||
| 		if offset != endsAt { | ||||
| 			d.bufs.Put(buf) | ||||
| 			return nil, fmt.Errorf("corruption detected: short output block %d, end %d != %d", i, offset, endsAt) | ||||
| 		} | ||||
| 		decoded += offset - dstEvery*i | ||||
| 		err = br.close() | ||||
| 		if err != nil { | ||||
| 			d.bufs.Put(buf) | ||||
| 			return nil, err | ||||
| 		} | ||||
| 	} | ||||
| 	d.bufs.Put(buf) | ||||
| 	if dstSize != decoded { | ||||
| 		return nil, errors.New("corruption detected: short output block") | ||||
| 	} | ||||
| @@ -1135,12 +999,12 @@ func (d *Decoder) decompress4X8bitExactly(dst, src []byte) ([]byte, error) { | ||||
| 	single := d.dt.single[:tlSize] | ||||
|  | ||||
| 	// Use temp table to avoid bound checks/append penalty. | ||||
| 	var buf [256]byte | ||||
| 	buf := d.buffer() | ||||
| 	var off uint8 | ||||
| 	var decoded int | ||||
|  | ||||
| 	// Decode 4 values from each decoder/loop. | ||||
| 	const bufoff = 256 / 4 | ||||
| 	const bufoff = 256 | ||||
| 	for { | ||||
| 		if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 { | ||||
| 			break | ||||
| @@ -1150,104 +1014,109 @@ func (d *Decoder) decompress4X8bitExactly(dst, src []byte) ([]byte, error) { | ||||
| 			// Interleave 2 decodes. | ||||
| 			const stream = 0 | ||||
| 			const stream2 = 1 | ||||
| 			br[stream].fillFast() | ||||
| 			br[stream2].fillFast() | ||||
| 			br1 := &br[stream] | ||||
| 			br2 := &br[stream2] | ||||
| 			br1.fillFast() | ||||
| 			br2.fillFast() | ||||
|  | ||||
| 			v := single[uint8(br[stream].value>>shift)].entry | ||||
| 			v2 := single[uint8(br[stream2].value>>shift)].entry | ||||
| 			br[stream].bitsRead += uint8(v) | ||||
| 			br[stream].value <<= v & 63 | ||||
| 			br[stream2].bitsRead += uint8(v2) | ||||
| 			br[stream2].value <<= v2 & 63 | ||||
| 			buf[off+bufoff*stream] = uint8(v >> 8) | ||||
| 			buf[off+bufoff*stream2] = uint8(v2 >> 8) | ||||
| 			v := single[uint8(br1.value>>shift)].entry | ||||
| 			v2 := single[uint8(br2.value>>shift)].entry | ||||
| 			br1.bitsRead += uint8(v) | ||||
| 			br1.value <<= v & 63 | ||||
| 			br2.bitsRead += uint8(v2) | ||||
| 			br2.value <<= v2 & 63 | ||||
| 			buf[stream][off] = uint8(v >> 8) | ||||
| 			buf[stream2][off] = uint8(v2 >> 8) | ||||
|  | ||||
| 			v = single[uint8(br[stream].value>>shift)].entry | ||||
| 			v2 = single[uint8(br[stream2].value>>shift)].entry | ||||
| 			br[stream].bitsRead += uint8(v) | ||||
| 			br[stream].value <<= v & 63 | ||||
| 			br[stream2].bitsRead += uint8(v2) | ||||
| 			br[stream2].value <<= v2 & 63 | ||||
| 			buf[off+bufoff*stream+1] = uint8(v >> 8) | ||||
| 			buf[off+bufoff*stream2+1] = uint8(v2 >> 8) | ||||
| 			v = single[uint8(br1.value>>shift)].entry | ||||
| 			v2 = single[uint8(br2.value>>shift)].entry | ||||
| 			br1.bitsRead += uint8(v) | ||||
| 			br1.value <<= v & 63 | ||||
| 			br2.bitsRead += uint8(v2) | ||||
| 			br2.value <<= v2 & 63 | ||||
| 			buf[stream][off+1] = uint8(v >> 8) | ||||
| 			buf[stream2][off+1] = uint8(v2 >> 8) | ||||
|  | ||||
| 			v = single[uint8(br[stream].value>>shift)].entry | ||||
| 			v2 = single[uint8(br[stream2].value>>shift)].entry | ||||
| 			br[stream].bitsRead += uint8(v) | ||||
| 			br[stream].value <<= v & 63 | ||||
| 			br[stream2].bitsRead += uint8(v2) | ||||
| 			br[stream2].value <<= v2 & 63 | ||||
| 			buf[off+bufoff*stream+2] = uint8(v >> 8) | ||||
| 			buf[off+bufoff*stream2+2] = uint8(v2 >> 8) | ||||
| 			v = single[uint8(br1.value>>shift)].entry | ||||
| 			v2 = single[uint8(br2.value>>shift)].entry | ||||
| 			br1.bitsRead += uint8(v) | ||||
| 			br1.value <<= v & 63 | ||||
| 			br2.bitsRead += uint8(v2) | ||||
| 			br2.value <<= v2 & 63 | ||||
| 			buf[stream][off+2] = uint8(v >> 8) | ||||
| 			buf[stream2][off+2] = uint8(v2 >> 8) | ||||
|  | ||||
| 			v = single[uint8(br[stream].value>>shift)].entry | ||||
| 			v2 = single[uint8(br[stream2].value>>shift)].entry | ||||
| 			br[stream].bitsRead += uint8(v) | ||||
| 			br[stream].value <<= v & 63 | ||||
| 			br[stream2].bitsRead += uint8(v2) | ||||
| 			br[stream2].value <<= v2 & 63 | ||||
| 			buf[off+bufoff*stream+3] = uint8(v >> 8) | ||||
| 			buf[off+bufoff*stream2+3] = uint8(v2 >> 8) | ||||
| 			v = single[uint8(br1.value>>shift)].entry | ||||
| 			v2 = single[uint8(br2.value>>shift)].entry | ||||
| 			br1.bitsRead += uint8(v) | ||||
| 			br1.value <<= v & 63 | ||||
| 			br2.bitsRead += uint8(v2) | ||||
| 			br2.value <<= v2 & 63 | ||||
| 			buf[stream][off+3] = uint8(v >> 8) | ||||
| 			buf[stream2][off+3] = uint8(v2 >> 8) | ||||
| 		} | ||||
|  | ||||
| 		{ | ||||
| 			const stream = 2 | ||||
| 			const stream2 = 3 | ||||
| 			br[stream].fillFast() | ||||
| 			br[stream2].fillFast() | ||||
| 			br1 := &br[stream] | ||||
| 			br2 := &br[stream2] | ||||
| 			br1.fillFast() | ||||
| 			br2.fillFast() | ||||
|  | ||||
| 			v := single[uint8(br[stream].value>>shift)].entry | ||||
| 			v2 := single[uint8(br[stream2].value>>shift)].entry | ||||
| 			br[stream].bitsRead += uint8(v) | ||||
| 			br[stream].value <<= v & 63 | ||||
| 			br[stream2].bitsRead += uint8(v2) | ||||
| 			br[stream2].value <<= v2 & 63 | ||||
| 			buf[off+bufoff*stream] = uint8(v >> 8) | ||||
| 			buf[off+bufoff*stream2] = uint8(v2 >> 8) | ||||
| 			v := single[uint8(br1.value>>shift)].entry | ||||
| 			v2 := single[uint8(br2.value>>shift)].entry | ||||
| 			br1.bitsRead += uint8(v) | ||||
| 			br1.value <<= v & 63 | ||||
| 			br2.bitsRead += uint8(v2) | ||||
| 			br2.value <<= v2 & 63 | ||||
| 			buf[stream][off] = uint8(v >> 8) | ||||
| 			buf[stream2][off] = uint8(v2 >> 8) | ||||
|  | ||||
| 			v = single[uint8(br[stream].value>>shift)].entry | ||||
| 			v2 = single[uint8(br[stream2].value>>shift)].entry | ||||
| 			br[stream].bitsRead += uint8(v) | ||||
| 			br[stream].value <<= v & 63 | ||||
| 			br[stream2].bitsRead += uint8(v2) | ||||
| 			br[stream2].value <<= v2 & 63 | ||||
| 			buf[off+bufoff*stream+1] = uint8(v >> 8) | ||||
| 			buf[off+bufoff*stream2+1] = uint8(v2 >> 8) | ||||
| 			v = single[uint8(br1.value>>shift)].entry | ||||
| 			v2 = single[uint8(br2.value>>shift)].entry | ||||
| 			br1.bitsRead += uint8(v) | ||||
| 			br1.value <<= v & 63 | ||||
| 			br2.bitsRead += uint8(v2) | ||||
| 			br2.value <<= v2 & 63 | ||||
| 			buf[stream][off+1] = uint8(v >> 8) | ||||
| 			buf[stream2][off+1] = uint8(v2 >> 8) | ||||
|  | ||||
| 			v = single[uint8(br[stream].value>>shift)].entry | ||||
| 			v2 = single[uint8(br[stream2].value>>shift)].entry | ||||
| 			br[stream].bitsRead += uint8(v) | ||||
| 			br[stream].value <<= v & 63 | ||||
| 			br[stream2].bitsRead += uint8(v2) | ||||
| 			br[stream2].value <<= v2 & 63 | ||||
| 			buf[off+bufoff*stream+2] = uint8(v >> 8) | ||||
| 			buf[off+bufoff*stream2+2] = uint8(v2 >> 8) | ||||
| 			v = single[uint8(br1.value>>shift)].entry | ||||
| 			v2 = single[uint8(br2.value>>shift)].entry | ||||
| 			br1.bitsRead += uint8(v) | ||||
| 			br1.value <<= v & 63 | ||||
| 			br2.bitsRead += uint8(v2) | ||||
| 			br2.value <<= v2 & 63 | ||||
| 			buf[stream][off+2] = uint8(v >> 8) | ||||
| 			buf[stream2][off+2] = uint8(v2 >> 8) | ||||
|  | ||||
| 			v = single[uint8(br[stream].value>>shift)].entry | ||||
| 			v2 = single[uint8(br[stream2].value>>shift)].entry | ||||
| 			br[stream].bitsRead += uint8(v) | ||||
| 			br[stream].value <<= v & 63 | ||||
| 			br[stream2].bitsRead += uint8(v2) | ||||
| 			br[stream2].value <<= v2 & 63 | ||||
| 			buf[off+bufoff*stream+3] = uint8(v >> 8) | ||||
| 			buf[off+bufoff*stream2+3] = uint8(v2 >> 8) | ||||
| 			v = single[uint8(br1.value>>shift)].entry | ||||
| 			v2 = single[uint8(br2.value>>shift)].entry | ||||
| 			br1.bitsRead += uint8(v) | ||||
| 			br1.value <<= v & 63 | ||||
| 			br2.bitsRead += uint8(v2) | ||||
| 			br2.value <<= v2 & 63 | ||||
| 			buf[stream][off+3] = uint8(v >> 8) | ||||
| 			buf[stream2][off+3] = uint8(v2 >> 8) | ||||
| 		} | ||||
|  | ||||
| 		off += 4 | ||||
|  | ||||
| 		if off == bufoff { | ||||
| 		if off == 0 { | ||||
| 			if bufoff > dstEvery { | ||||
| 				d.bufs.Put(buf) | ||||
| 				return nil, errors.New("corruption detected: stream overrun 1") | ||||
| 			} | ||||
| 			copy(out, buf[:bufoff]) | ||||
| 			copy(out[dstEvery:], buf[bufoff:bufoff*2]) | ||||
| 			copy(out[dstEvery*2:], buf[bufoff*2:bufoff*3]) | ||||
| 			copy(out[dstEvery*3:], buf[bufoff*3:bufoff*4]) | ||||
| 			off = 0 | ||||
| 			copy(out, buf[0][:]) | ||||
| 			copy(out[dstEvery:], buf[1][:]) | ||||
| 			copy(out[dstEvery*2:], buf[2][:]) | ||||
| 			copy(out[dstEvery*3:], buf[3][:]) | ||||
| 			out = out[bufoff:] | ||||
| 			decoded += 256 | ||||
| 			decoded += bufoff * 4 | ||||
| 			// There must at least be 3 buffers left. | ||||
| 			if len(out) < dstEvery*3 { | ||||
| 				d.bufs.Put(buf) | ||||
| 				return nil, errors.New("corruption detected: stream overrun 2") | ||||
| 			} | ||||
| 		} | ||||
| @@ -1257,21 +1126,27 @@ func (d *Decoder) decompress4X8bitExactly(dst, src []byte) ([]byte, error) { | ||||
| 		if len(out) < dstEvery*3+ioff { | ||||
| 			return nil, errors.New("corruption detected: stream overrun 3") | ||||
| 		} | ||||
| 		copy(out, buf[:off]) | ||||
| 		copy(out[dstEvery:dstEvery+ioff], buf[bufoff:bufoff*2]) | ||||
| 		copy(out[dstEvery*2:dstEvery*2+ioff], buf[bufoff*2:bufoff*3]) | ||||
| 		copy(out[dstEvery*3:dstEvery*3+ioff], buf[bufoff*3:bufoff*4]) | ||||
| 		copy(out, buf[0][:off]) | ||||
| 		copy(out[dstEvery:], buf[1][:off]) | ||||
| 		copy(out[dstEvery*2:], buf[2][:off]) | ||||
| 		copy(out[dstEvery*3:], buf[3][:off]) | ||||
| 		decoded += int(off) * 4 | ||||
| 		out = out[off:] | ||||
| 	} | ||||
|  | ||||
| 	// Decode remaining. | ||||
| 	remainBytes := dstEvery - (decoded / 4) | ||||
| 	for i := range br { | ||||
| 		offset := dstEvery * i | ||||
| 		endsAt := offset + remainBytes | ||||
| 		if endsAt > len(out) { | ||||
| 			endsAt = len(out) | ||||
| 		} | ||||
| 		br := &br[i] | ||||
| 		bitsLeft := int(br.off*8) + int(64-br.bitsRead) | ||||
| 		bitsLeft := br.remaining() | ||||
| 		for bitsLeft > 0 { | ||||
| 			if br.finished() { | ||||
| 				d.bufs.Put(buf) | ||||
| 				return nil, io.ErrUnexpectedEOF | ||||
| 			} | ||||
| 			if br.bitsRead >= 56 { | ||||
| @@ -1291,7 +1166,8 @@ func (d *Decoder) decompress4X8bitExactly(dst, src []byte) ([]byte, error) { | ||||
| 				} | ||||
| 			} | ||||
| 			// end inline... | ||||
| 			if offset >= len(out) { | ||||
| 			if offset >= endsAt { | ||||
| 				d.bufs.Put(buf) | ||||
| 				return nil, errors.New("corruption detected: stream overrun 4") | ||||
| 			} | ||||
|  | ||||
| @@ -1299,16 +1175,23 @@ func (d *Decoder) decompress4X8bitExactly(dst, src []byte) ([]byte, error) { | ||||
| 			v := single[br.peekByteFast()].entry | ||||
| 			nBits := uint8(v) | ||||
| 			br.advance(nBits) | ||||
| 			bitsLeft -= int(nBits) | ||||
| 			bitsLeft -= uint(nBits) | ||||
| 			out[offset] = uint8(v >> 8) | ||||
| 			offset++ | ||||
| 		} | ||||
| 		if offset != endsAt { | ||||
| 			d.bufs.Put(buf) | ||||
| 			return nil, fmt.Errorf("corruption detected: short output block %d, end %d != %d", i, offset, endsAt) | ||||
| 		} | ||||
|  | ||||
| 		decoded += offset - dstEvery*i | ||||
| 		err = br.close() | ||||
| 		if err != nil { | ||||
| 			d.bufs.Put(buf) | ||||
| 			return nil, err | ||||
| 		} | ||||
| 	} | ||||
| 	d.bufs.Put(buf) | ||||
| 	if dstSize != decoded { | ||||
| 		return nil, errors.New("corruption detected: short output block") | ||||
| 	} | ||||
|   | ||||
							
								
								
									
										488
									
								
								vendor/github.com/klauspost/compress/huff0/decompress_8b_amd64.s
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										488
									
								
								vendor/github.com/klauspost/compress/huff0/decompress_8b_amd64.s
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,488 @@ | ||||
| // +build !appengine | ||||
| // +build gc | ||||
| // +build !noasm | ||||
|  | ||||
| #include "textflag.h" | ||||
| #include "funcdata.h" | ||||
| #include "go_asm.h" | ||||
|  | ||||
| #define bufoff      256 // see decompress.go, we're using [4][256]byte table | ||||
|  | ||||
| // func decompress4x_main_loop_x86(pbr0, pbr1, pbr2, pbr3 *bitReaderShifted, | ||||
| //	peekBits uint8, buf *byte, tbl *dEntrySingle) (int, bool) | ||||
| TEXT ·decompress4x_8b_loop_x86(SB), NOSPLIT, $8 | ||||
| #define off             R8 | ||||
| #define buffer          DI | ||||
| #define table           SI | ||||
|  | ||||
| #define br_bits_read    R9 | ||||
| #define br_value        R10 | ||||
| #define br_offset       R11 | ||||
| #define peek_bits       R12 | ||||
| #define exhausted       DX | ||||
|  | ||||
| #define br0             R13 | ||||
| #define br1             R14 | ||||
| #define br2             R15 | ||||
| #define br3             BP | ||||
|  | ||||
| 	MOVQ BP, 0(SP) | ||||
|  | ||||
| 	XORQ exhausted, exhausted // exhausted = false | ||||
| 	XORQ off, off             // off = 0 | ||||
|  | ||||
| 	MOVBQZX peekBits+32(FP), peek_bits | ||||
| 	MOVQ    buf+40(FP), buffer | ||||
| 	MOVQ    tbl+48(FP), table | ||||
|  | ||||
| 	MOVQ pbr0+0(FP), br0 | ||||
| 	MOVQ pbr1+8(FP), br1 | ||||
| 	MOVQ pbr2+16(FP), br2 | ||||
| 	MOVQ pbr3+24(FP), br3 | ||||
|  | ||||
| main_loop: | ||||
|  | ||||
| 	// const stream = 0 | ||||
| 	// br0.fillFast() | ||||
| 	MOVBQZX bitReaderShifted_bitsRead(br0), br_bits_read | ||||
| 	MOVQ    bitReaderShifted_value(br0), br_value | ||||
| 	MOVQ    bitReaderShifted_off(br0), br_offset | ||||
|  | ||||
| 	// if b.bitsRead >= 32 { | ||||
| 	CMPQ br_bits_read, $32 | ||||
| 	JB   skip_fill0 | ||||
|  | ||||
| 	SUBQ $32, br_bits_read // b.bitsRead -= 32 | ||||
| 	SUBQ $4, br_offset     // b.off -= 4 | ||||
|  | ||||
| 	// v := b.in[b.off-4 : b.off] | ||||
| 	// v = v[:4] | ||||
| 	// low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) | ||||
| 	MOVQ bitReaderShifted_in(br0), AX | ||||
| 	MOVL 0(br_offset)(AX*1), AX       // AX = uint32(b.in[b.off:b.off+4]) | ||||
|  | ||||
| 	// b.value |= uint64(low) << (b.bitsRead & 63) | ||||
| 	MOVQ br_bits_read, CX | ||||
| 	SHLQ CL, AX | ||||
| 	ORQ  AX, br_value | ||||
|  | ||||
| 	// exhausted = exhausted || (br0.off < 4) | ||||
| 	CMPQ  br_offset, $4 | ||||
| 	SETLT DL | ||||
| 	ORB   DL, DH | ||||
|  | ||||
| 	// } | ||||
| skip_fill0: | ||||
|  | ||||
| 	// val0 := br0.peekTopBits(peekBits) | ||||
| 	MOVQ br_value, AX | ||||
| 	MOVQ peek_bits, CX | ||||
| 	SHRQ CL, AX        // AX = (value >> peek_bits) & mask | ||||
|  | ||||
| 	// v0 := table[val0&mask] | ||||
| 	MOVW 0(table)(AX*2), AX // AX - v0 | ||||
|  | ||||
| 	// br0.advance(uint8(v0.entry)) | ||||
| 	MOVB    AH, BL           // BL = uint8(v0.entry >> 8) | ||||
| 	MOVBQZX AL, CX | ||||
| 	SHLQ    CL, br_value     // value <<= n | ||||
| 	ADDQ    CX, br_bits_read // bits_read += n | ||||
|  | ||||
| 	// val1 := br0.peekTopBits(peekBits) | ||||
| 	MOVQ peek_bits, CX | ||||
| 	MOVQ br_value, AX | ||||
| 	SHRQ CL, AX        // AX = (value >> peek_bits) & mask | ||||
|  | ||||
| 	// v1 := table[val1&mask] | ||||
| 	MOVW 0(table)(AX*2), AX // AX - v1 | ||||
|  | ||||
| 	// br0.advance(uint8(v1.entry)) | ||||
| 	MOVB    AH, BH           // BH = uint8(v1.entry >> 8) | ||||
| 	MOVBQZX AL, CX | ||||
| 	SHLQ    CX, br_value     // value <<= n | ||||
| 	ADDQ    CX, br_bits_read // bits_read += n | ||||
|  | ||||
| 	// these two writes get coalesced | ||||
| 	// buf[stream][off] = uint8(v0.entry >> 8) | ||||
| 	// buf[stream][off+1] = uint8(v1.entry >> 8) | ||||
| 	MOVW BX, 0(buffer)(off*1) | ||||
|  | ||||
| 	// SECOND PART: | ||||
| 	// val2 := br0.peekTopBits(peekBits) | ||||
| 	MOVQ br_value, AX | ||||
| 	MOVQ peek_bits, CX | ||||
| 	SHRQ CL, AX        // AX = (value >> peek_bits) & mask | ||||
|  | ||||
| 	// v2 := table[val0&mask] | ||||
| 	MOVW 0(table)(AX*2), AX // AX - v0 | ||||
|  | ||||
| 	// br0.advance(uint8(v0.entry)) | ||||
| 	MOVB    AH, BL           // BL = uint8(v0.entry >> 8) | ||||
| 	MOVBQZX AL, CX | ||||
| 	SHLQ    CL, br_value     // value <<= n | ||||
| 	ADDQ    CX, br_bits_read // bits_read += n | ||||
|  | ||||
| 	// val3 := br0.peekTopBits(peekBits) | ||||
| 	MOVQ peek_bits, CX | ||||
| 	MOVQ br_value, AX | ||||
| 	SHRQ CL, AX        // AX = (value >> peek_bits) & mask | ||||
|  | ||||
| 	// v3 := table[val1&mask] | ||||
| 	MOVW 0(table)(AX*2), AX // AX - v1 | ||||
|  | ||||
| 	// br0.advance(uint8(v1.entry)) | ||||
| 	MOVB    AH, BH           // BH = uint8(v1.entry >> 8) | ||||
| 	MOVBQZX AL, CX | ||||
| 	SHLQ    CX, br_value     // value <<= n | ||||
| 	ADDQ    CX, br_bits_read // bits_read += n | ||||
|  | ||||
| 	// these two writes get coalesced | ||||
| 	// buf[stream][off+2] = uint8(v2.entry >> 8) | ||||
| 	// buf[stream][off+3] = uint8(v3.entry >> 8) | ||||
| 	MOVW BX, 0+2(buffer)(off*1) | ||||
|  | ||||
| 	// update the bitrader reader structure | ||||
| 	MOVB br_bits_read, bitReaderShifted_bitsRead(br0) | ||||
| 	MOVQ br_value, bitReaderShifted_value(br0) | ||||
| 	MOVQ br_offset, bitReaderShifted_off(br0) | ||||
|  | ||||
| 	// const stream = 1 | ||||
| 	// br1.fillFast() | ||||
| 	MOVBQZX bitReaderShifted_bitsRead(br1), br_bits_read | ||||
| 	MOVQ    bitReaderShifted_value(br1), br_value | ||||
| 	MOVQ    bitReaderShifted_off(br1), br_offset | ||||
|  | ||||
| 	// if b.bitsRead >= 32 { | ||||
| 	CMPQ br_bits_read, $32 | ||||
| 	JB   skip_fill1 | ||||
|  | ||||
| 	SUBQ $32, br_bits_read // b.bitsRead -= 32 | ||||
| 	SUBQ $4, br_offset     // b.off -= 4 | ||||
|  | ||||
| 	// v := b.in[b.off-4 : b.off] | ||||
| 	// v = v[:4] | ||||
| 	// low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) | ||||
| 	MOVQ bitReaderShifted_in(br1), AX | ||||
| 	MOVL 0(br_offset)(AX*1), AX       // AX = uint32(b.in[b.off:b.off+4]) | ||||
|  | ||||
| 	// b.value |= uint64(low) << (b.bitsRead & 63) | ||||
| 	MOVQ br_bits_read, CX | ||||
| 	SHLQ CL, AX | ||||
| 	ORQ  AX, br_value | ||||
|  | ||||
| 	// exhausted = exhausted || (br1.off < 4) | ||||
| 	CMPQ  br_offset, $4 | ||||
| 	SETLT DL | ||||
| 	ORB   DL, DH | ||||
|  | ||||
| 	// } | ||||
| skip_fill1: | ||||
|  | ||||
| 	// val0 := br1.peekTopBits(peekBits) | ||||
| 	MOVQ br_value, AX | ||||
| 	MOVQ peek_bits, CX | ||||
| 	SHRQ CL, AX        // AX = (value >> peek_bits) & mask | ||||
|  | ||||
| 	// v0 := table[val0&mask] | ||||
| 	MOVW 0(table)(AX*2), AX // AX - v0 | ||||
|  | ||||
| 	// br1.advance(uint8(v0.entry)) | ||||
| 	MOVB    AH, BL           // BL = uint8(v0.entry >> 8) | ||||
| 	MOVBQZX AL, CX | ||||
| 	SHLQ    CL, br_value     // value <<= n | ||||
| 	ADDQ    CX, br_bits_read // bits_read += n | ||||
|  | ||||
| 	// val1 := br1.peekTopBits(peekBits) | ||||
| 	MOVQ peek_bits, CX | ||||
| 	MOVQ br_value, AX | ||||
| 	SHRQ CL, AX        // AX = (value >> peek_bits) & mask | ||||
|  | ||||
| 	// v1 := table[val1&mask] | ||||
| 	MOVW 0(table)(AX*2), AX // AX - v1 | ||||
|  | ||||
| 	// br1.advance(uint8(v1.entry)) | ||||
| 	MOVB    AH, BH           // BH = uint8(v1.entry >> 8) | ||||
| 	MOVBQZX AL, CX | ||||
| 	SHLQ    CX, br_value     // value <<= n | ||||
| 	ADDQ    CX, br_bits_read // bits_read += n | ||||
|  | ||||
| 	// these two writes get coalesced | ||||
| 	// buf[stream][off] = uint8(v0.entry >> 8) | ||||
| 	// buf[stream][off+1] = uint8(v1.entry >> 8) | ||||
| 	MOVW BX, 256(buffer)(off*1) | ||||
|  | ||||
| 	// SECOND PART: | ||||
| 	// val2 := br1.peekTopBits(peekBits) | ||||
| 	MOVQ br_value, AX | ||||
| 	MOVQ peek_bits, CX | ||||
| 	SHRQ CL, AX        // AX = (value >> peek_bits) & mask | ||||
|  | ||||
| 	// v2 := table[val0&mask] | ||||
| 	MOVW 0(table)(AX*2), AX // AX - v0 | ||||
|  | ||||
| 	// br1.advance(uint8(v0.entry)) | ||||
| 	MOVB    AH, BL           // BL = uint8(v0.entry >> 8) | ||||
| 	MOVBQZX AL, CX | ||||
| 	SHLQ    CL, br_value     // value <<= n | ||||
| 	ADDQ    CX, br_bits_read // bits_read += n | ||||
|  | ||||
| 	// val3 := br1.peekTopBits(peekBits) | ||||
| 	MOVQ peek_bits, CX | ||||
| 	MOVQ br_value, AX | ||||
| 	SHRQ CL, AX        // AX = (value >> peek_bits) & mask | ||||
|  | ||||
| 	// v3 := table[val1&mask] | ||||
| 	MOVW 0(table)(AX*2), AX // AX - v1 | ||||
|  | ||||
| 	// br1.advance(uint8(v1.entry)) | ||||
| 	MOVB    AH, BH           // BH = uint8(v1.entry >> 8) | ||||
| 	MOVBQZX AL, CX | ||||
| 	SHLQ    CX, br_value     // value <<= n | ||||
| 	ADDQ    CX, br_bits_read // bits_read += n | ||||
|  | ||||
| 	// these two writes get coalesced | ||||
| 	// buf[stream][off+2] = uint8(v2.entry >> 8) | ||||
| 	// buf[stream][off+3] = uint8(v3.entry >> 8) | ||||
| 	MOVW BX, 256+2(buffer)(off*1) | ||||
|  | ||||
| 	// update the bitrader reader structure | ||||
| 	MOVB br_bits_read, bitReaderShifted_bitsRead(br1) | ||||
| 	MOVQ br_value, bitReaderShifted_value(br1) | ||||
| 	MOVQ br_offset, bitReaderShifted_off(br1) | ||||
|  | ||||
| 	// const stream = 2 | ||||
| 	// br2.fillFast() | ||||
| 	MOVBQZX bitReaderShifted_bitsRead(br2), br_bits_read | ||||
| 	MOVQ    bitReaderShifted_value(br2), br_value | ||||
| 	MOVQ    bitReaderShifted_off(br2), br_offset | ||||
|  | ||||
| 	// if b.bitsRead >= 32 { | ||||
| 	CMPQ br_bits_read, $32 | ||||
| 	JB   skip_fill2 | ||||
|  | ||||
| 	SUBQ $32, br_bits_read // b.bitsRead -= 32 | ||||
| 	SUBQ $4, br_offset     // b.off -= 4 | ||||
|  | ||||
| 	// v := b.in[b.off-4 : b.off] | ||||
| 	// v = v[:4] | ||||
| 	// low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) | ||||
| 	MOVQ bitReaderShifted_in(br2), AX | ||||
| 	MOVL 0(br_offset)(AX*1), AX       // AX = uint32(b.in[b.off:b.off+4]) | ||||
|  | ||||
| 	// b.value |= uint64(low) << (b.bitsRead & 63) | ||||
| 	MOVQ br_bits_read, CX | ||||
| 	SHLQ CL, AX | ||||
| 	ORQ  AX, br_value | ||||
|  | ||||
| 	// exhausted = exhausted || (br2.off < 4) | ||||
| 	CMPQ  br_offset, $4 | ||||
| 	SETLT DL | ||||
| 	ORB   DL, DH | ||||
|  | ||||
| 	// } | ||||
| skip_fill2: | ||||
|  | ||||
| 	// val0 := br2.peekTopBits(peekBits) | ||||
| 	MOVQ br_value, AX | ||||
| 	MOVQ peek_bits, CX | ||||
| 	SHRQ CL, AX        // AX = (value >> peek_bits) & mask | ||||
|  | ||||
| 	// v0 := table[val0&mask] | ||||
| 	MOVW 0(table)(AX*2), AX // AX - v0 | ||||
|  | ||||
| 	// br2.advance(uint8(v0.entry)) | ||||
| 	MOVB    AH, BL           // BL = uint8(v0.entry >> 8) | ||||
| 	MOVBQZX AL, CX | ||||
| 	SHLQ    CL, br_value     // value <<= n | ||||
| 	ADDQ    CX, br_bits_read // bits_read += n | ||||
|  | ||||
| 	// val1 := br2.peekTopBits(peekBits) | ||||
| 	MOVQ peek_bits, CX | ||||
| 	MOVQ br_value, AX | ||||
| 	SHRQ CL, AX        // AX = (value >> peek_bits) & mask | ||||
|  | ||||
| 	// v1 := table[val1&mask] | ||||
| 	MOVW 0(table)(AX*2), AX // AX - v1 | ||||
|  | ||||
| 	// br2.advance(uint8(v1.entry)) | ||||
| 	MOVB    AH, BH           // BH = uint8(v1.entry >> 8) | ||||
| 	MOVBQZX AL, CX | ||||
| 	SHLQ    CX, br_value     // value <<= n | ||||
| 	ADDQ    CX, br_bits_read // bits_read += n | ||||
|  | ||||
| 	// these two writes get coalesced | ||||
| 	// buf[stream][off] = uint8(v0.entry >> 8) | ||||
| 	// buf[stream][off+1] = uint8(v1.entry >> 8) | ||||
| 	MOVW BX, 512(buffer)(off*1) | ||||
|  | ||||
| 	// SECOND PART: | ||||
| 	// val2 := br2.peekTopBits(peekBits) | ||||
| 	MOVQ br_value, AX | ||||
| 	MOVQ peek_bits, CX | ||||
| 	SHRQ CL, AX        // AX = (value >> peek_bits) & mask | ||||
|  | ||||
| 	// v2 := table[val0&mask] | ||||
| 	MOVW 0(table)(AX*2), AX // AX - v0 | ||||
|  | ||||
| 	// br2.advance(uint8(v0.entry)) | ||||
| 	MOVB    AH, BL           // BL = uint8(v0.entry >> 8) | ||||
| 	MOVBQZX AL, CX | ||||
| 	SHLQ    CL, br_value     // value <<= n | ||||
| 	ADDQ    CX, br_bits_read // bits_read += n | ||||
|  | ||||
| 	// val3 := br2.peekTopBits(peekBits) | ||||
| 	MOVQ peek_bits, CX | ||||
| 	MOVQ br_value, AX | ||||
| 	SHRQ CL, AX        // AX = (value >> peek_bits) & mask | ||||
|  | ||||
| 	// v3 := table[val1&mask] | ||||
| 	MOVW 0(table)(AX*2), AX // AX - v1 | ||||
|  | ||||
| 	// br2.advance(uint8(v1.entry)) | ||||
| 	MOVB    AH, BH           // BH = uint8(v1.entry >> 8) | ||||
| 	MOVBQZX AL, CX | ||||
| 	SHLQ    CX, br_value     // value <<= n | ||||
| 	ADDQ    CX, br_bits_read // bits_read += n | ||||
|  | ||||
| 	// these two writes get coalesced | ||||
| 	// buf[stream][off+2] = uint8(v2.entry >> 8) | ||||
| 	// buf[stream][off+3] = uint8(v3.entry >> 8) | ||||
| 	MOVW BX, 512+2(buffer)(off*1) | ||||
|  | ||||
| 	// update the bitrader reader structure | ||||
| 	MOVB br_bits_read, bitReaderShifted_bitsRead(br2) | ||||
| 	MOVQ br_value, bitReaderShifted_value(br2) | ||||
| 	MOVQ br_offset, bitReaderShifted_off(br2) | ||||
|  | ||||
| 	// const stream = 3 | ||||
| 	// br3.fillFast() | ||||
| 	MOVBQZX bitReaderShifted_bitsRead(br3), br_bits_read | ||||
| 	MOVQ    bitReaderShifted_value(br3), br_value | ||||
| 	MOVQ    bitReaderShifted_off(br3), br_offset | ||||
|  | ||||
| 	// if b.bitsRead >= 32 { | ||||
| 	CMPQ br_bits_read, $32 | ||||
| 	JB   skip_fill3 | ||||
|  | ||||
| 	SUBQ $32, br_bits_read // b.bitsRead -= 32 | ||||
| 	SUBQ $4, br_offset     // b.off -= 4 | ||||
|  | ||||
| 	// v := b.in[b.off-4 : b.off] | ||||
| 	// v = v[:4] | ||||
| 	// low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) | ||||
| 	MOVQ bitReaderShifted_in(br3), AX | ||||
| 	MOVL 0(br_offset)(AX*1), AX       // AX = uint32(b.in[b.off:b.off+4]) | ||||
|  | ||||
| 	// b.value |= uint64(low) << (b.bitsRead & 63) | ||||
| 	MOVQ br_bits_read, CX | ||||
| 	SHLQ CL, AX | ||||
| 	ORQ  AX, br_value | ||||
|  | ||||
| 	// exhausted = exhausted || (br3.off < 4) | ||||
| 	CMPQ  br_offset, $4 | ||||
| 	SETLT DL | ||||
| 	ORB   DL, DH | ||||
|  | ||||
| 	// } | ||||
| skip_fill3: | ||||
|  | ||||
| 	// val0 := br3.peekTopBits(peekBits) | ||||
| 	MOVQ br_value, AX | ||||
| 	MOVQ peek_bits, CX | ||||
| 	SHRQ CL, AX        // AX = (value >> peek_bits) & mask | ||||
|  | ||||
| 	// v0 := table[val0&mask] | ||||
| 	MOVW 0(table)(AX*2), AX // AX - v0 | ||||
|  | ||||
| 	// br3.advance(uint8(v0.entry)) | ||||
| 	MOVB    AH, BL           // BL = uint8(v0.entry >> 8) | ||||
| 	MOVBQZX AL, CX | ||||
| 	SHLQ    CL, br_value     // value <<= n | ||||
| 	ADDQ    CX, br_bits_read // bits_read += n | ||||
|  | ||||
| 	// val1 := br3.peekTopBits(peekBits) | ||||
| 	MOVQ peek_bits, CX | ||||
| 	MOVQ br_value, AX | ||||
| 	SHRQ CL, AX        // AX = (value >> peek_bits) & mask | ||||
|  | ||||
| 	// v1 := table[val1&mask] | ||||
| 	MOVW 0(table)(AX*2), AX // AX - v1 | ||||
|  | ||||
| 	// br3.advance(uint8(v1.entry)) | ||||
| 	MOVB    AH, BH           // BH = uint8(v1.entry >> 8) | ||||
| 	MOVBQZX AL, CX | ||||
| 	SHLQ    CX, br_value     // value <<= n | ||||
| 	ADDQ    CX, br_bits_read // bits_read += n | ||||
|  | ||||
| 	// these two writes get coalesced | ||||
| 	// buf[stream][off] = uint8(v0.entry >> 8) | ||||
| 	// buf[stream][off+1] = uint8(v1.entry >> 8) | ||||
| 	MOVW BX, 768(buffer)(off*1) | ||||
|  | ||||
| 	// SECOND PART: | ||||
| 	// val2 := br3.peekTopBits(peekBits) | ||||
| 	MOVQ br_value, AX | ||||
| 	MOVQ peek_bits, CX | ||||
| 	SHRQ CL, AX        // AX = (value >> peek_bits) & mask | ||||
|  | ||||
| 	// v2 := table[val0&mask] | ||||
| 	MOVW 0(table)(AX*2), AX // AX - v0 | ||||
|  | ||||
| 	// br3.advance(uint8(v0.entry)) | ||||
| 	MOVB    AH, BL           // BL = uint8(v0.entry >> 8) | ||||
| 	MOVBQZX AL, CX | ||||
| 	SHLQ    CL, br_value     // value <<= n | ||||
| 	ADDQ    CX, br_bits_read // bits_read += n | ||||
|  | ||||
| 	// val3 := br3.peekTopBits(peekBits) | ||||
| 	MOVQ peek_bits, CX | ||||
| 	MOVQ br_value, AX | ||||
| 	SHRQ CL, AX        // AX = (value >> peek_bits) & mask | ||||
|  | ||||
| 	// v3 := table[val1&mask] | ||||
| 	MOVW 0(table)(AX*2), AX // AX - v1 | ||||
|  | ||||
| 	// br3.advance(uint8(v1.entry)) | ||||
| 	MOVB    AH, BH           // BH = uint8(v1.entry >> 8) | ||||
| 	MOVBQZX AL, CX | ||||
| 	SHLQ    CX, br_value     // value <<= n | ||||
| 	ADDQ    CX, br_bits_read // bits_read += n | ||||
|  | ||||
| 	// these two writes get coalesced | ||||
| 	// buf[stream][off+2] = uint8(v2.entry >> 8) | ||||
| 	// buf[stream][off+3] = uint8(v3.entry >> 8) | ||||
| 	MOVW BX, 768+2(buffer)(off*1) | ||||
|  | ||||
| 	// update the bitrader reader structure | ||||
| 	MOVB br_bits_read, bitReaderShifted_bitsRead(br3) | ||||
| 	MOVQ br_value, bitReaderShifted_value(br3) | ||||
| 	MOVQ br_offset, bitReaderShifted_off(br3) | ||||
|  | ||||
| 	ADDQ $4, off // off += 2 | ||||
|  | ||||
| 	TESTB DH, DH // any br[i].ofs < 4? | ||||
| 	JNZ   end | ||||
|  | ||||
| 	CMPQ off, $bufoff | ||||
| 	JL   main_loop | ||||
|  | ||||
| end: | ||||
| 	MOVQ 0(SP), BP | ||||
|  | ||||
| 	MOVB off, ret+56(FP) | ||||
| 	RET | ||||
|  | ||||
| #undef off | ||||
| #undef buffer | ||||
| #undef table | ||||
|  | ||||
| #undef br_bits_read | ||||
| #undef br_value | ||||
| #undef br_offset | ||||
| #undef peek_bits | ||||
| #undef exhausted | ||||
|  | ||||
| #undef br0 | ||||
| #undef br1 | ||||
| #undef br2 | ||||
| #undef br3 | ||||
							
								
								
									
										197
									
								
								vendor/github.com/klauspost/compress/huff0/decompress_8b_amd64.s.in
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										197
									
								
								vendor/github.com/klauspost/compress/huff0/decompress_8b_amd64.s.in
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,197 @@ | ||||
| // +build !appengine | ||||
| // +build gc | ||||
| // +build !noasm | ||||
|  | ||||
| #include "textflag.h" | ||||
| #include "funcdata.h" | ||||
| #include "go_asm.h" | ||||
|  | ||||
|  | ||||
| #define bufoff      256     // see decompress.go, we're using [4][256]byte table | ||||
|  | ||||
| //func decompress4x_main_loop_x86(pbr0, pbr1, pbr2, pbr3 *bitReaderShifted, | ||||
| //	peekBits uint8, buf *byte, tbl *dEntrySingle) (int, bool) | ||||
| TEXT ·decompress4x_8b_loop_x86(SB), NOSPLIT, $8 | ||||
| #define off             R8 | ||||
| #define buffer          DI | ||||
| #define table           SI | ||||
|  | ||||
| #define br_bits_read    R9 | ||||
| #define br_value        R10 | ||||
| #define br_offset       R11 | ||||
| #define peek_bits       R12 | ||||
| #define exhausted       DX | ||||
|  | ||||
| #define br0             R13 | ||||
| #define br1             R14 | ||||
| #define br2             R15 | ||||
| #define br3             BP | ||||
|  | ||||
|     MOVQ    BP, 0(SP) | ||||
|  | ||||
|     XORQ    exhausted, exhausted    // exhausted = false | ||||
|     XORQ    off, off                // off = 0 | ||||
|  | ||||
|     MOVBQZX peekBits+32(FP), peek_bits | ||||
|     MOVQ    buf+40(FP), buffer | ||||
|     MOVQ    tbl+48(FP), table | ||||
|  | ||||
|     MOVQ    pbr0+0(FP), br0 | ||||
|     MOVQ    pbr1+8(FP), br1 | ||||
|     MOVQ    pbr2+16(FP), br2 | ||||
|     MOVQ    pbr3+24(FP), br3 | ||||
|  | ||||
| main_loop: | ||||
| {{ define "decode_2_values_x86" }} | ||||
|     // const stream = {{ var "id" }} | ||||
|     // br{{ var "id"}}.fillFast() | ||||
|     MOVBQZX bitReaderShifted_bitsRead(br{{ var "id" }}), br_bits_read | ||||
|     MOVQ    bitReaderShifted_value(br{{ var "id" }}), br_value | ||||
|     MOVQ    bitReaderShifted_off(br{{ var "id" }}), br_offset | ||||
|  | ||||
| 	// if b.bitsRead >= 32 { | ||||
|     CMPQ    br_bits_read, $32 | ||||
|     JB      skip_fill{{ var "id" }} | ||||
|  | ||||
|     SUBQ    $32, br_bits_read       // b.bitsRead -= 32 | ||||
|     SUBQ    $4, br_offset           // b.off -= 4 | ||||
|  | ||||
| 	// v := b.in[b.off-4 : b.off] | ||||
| 	// v = v[:4] | ||||
| 	// low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) | ||||
|     MOVQ    bitReaderShifted_in(br{{ var "id" }}), AX | ||||
|     MOVL    0(br_offset)(AX*1), AX  // AX = uint32(b.in[b.off:b.off+4]) | ||||
|  | ||||
| 	// b.value |= uint64(low) << (b.bitsRead & 63) | ||||
|     MOVQ    br_bits_read, CX | ||||
|     SHLQ    CL, AX | ||||
|     ORQ     AX, br_value | ||||
|  | ||||
|     // exhausted = exhausted || (br{{ var "id"}}.off < 4) | ||||
|     CMPQ    br_offset, $4 | ||||
|     SETLT   DL | ||||
|     ORB     DL, DH | ||||
|     // } | ||||
| skip_fill{{ var "id" }}: | ||||
|  | ||||
|     // val0 := br{{ var "id"}}.peekTopBits(peekBits) | ||||
|     MOVQ    br_value, AX | ||||
|     MOVQ    peek_bits, CX | ||||
|     SHRQ    CL, AX                  // AX = (value >> peek_bits) & mask | ||||
|  | ||||
|     // v0 := table[val0&mask] | ||||
|     MOVW    0(table)(AX*2), AX      // AX - v0 | ||||
|  | ||||
|     // br{{ var "id"}}.advance(uint8(v0.entry)) | ||||
|     MOVB    AH, BL                  // BL = uint8(v0.entry >> 8) | ||||
|     MOVBQZX AL, CX | ||||
|     SHLQ    CL, br_value            // value <<= n | ||||
|     ADDQ    CX, br_bits_read        // bits_read += n | ||||
|  | ||||
|     // val1 := br{{ var "id"}}.peekTopBits(peekBits) | ||||
|     MOVQ    peek_bits, CX | ||||
|     MOVQ    br_value, AX | ||||
|     SHRQ    CL, AX                  // AX = (value >> peek_bits) & mask | ||||
|  | ||||
|     // v1 := table[val1&mask] | ||||
|     MOVW    0(table)(AX*2), AX      // AX - v1 | ||||
|  | ||||
|     // br{{ var "id"}}.advance(uint8(v1.entry)) | ||||
|     MOVB    AH, BH                  // BH = uint8(v1.entry >> 8) | ||||
|     MOVBQZX AL, CX | ||||
|     SHLQ    CX, br_value            // value <<= n | ||||
|     ADDQ    CX, br_bits_read        // bits_read += n | ||||
|  | ||||
|  | ||||
|     // these two writes get coalesced | ||||
|     // buf[stream][off] = uint8(v0.entry >> 8) | ||||
|     // buf[stream][off+1] = uint8(v1.entry >> 8) | ||||
|     MOVW    BX, {{ var "bufofs" }}(buffer)(off*1) | ||||
|  | ||||
|     // SECOND PART: | ||||
|     // val2 := br{{ var "id"}}.peekTopBits(peekBits) | ||||
|     MOVQ    br_value, AX | ||||
|     MOVQ    peek_bits, CX | ||||
|     SHRQ    CL, AX                  // AX = (value >> peek_bits) & mask | ||||
|  | ||||
|     // v2 := table[val0&mask] | ||||
|     MOVW    0(table)(AX*2), AX      // AX - v0 | ||||
|  | ||||
|     // br{{ var "id"}}.advance(uint8(v0.entry)) | ||||
|     MOVB    AH, BL                  // BL = uint8(v0.entry >> 8) | ||||
|     MOVBQZX AL, CX | ||||
|     SHLQ    CL, br_value            // value <<= n | ||||
|     ADDQ    CX, br_bits_read        // bits_read += n | ||||
|  | ||||
|     // val3 := br{{ var "id"}}.peekTopBits(peekBits) | ||||
|     MOVQ    peek_bits, CX | ||||
|     MOVQ    br_value, AX | ||||
|     SHRQ    CL, AX                  // AX = (value >> peek_bits) & mask | ||||
|  | ||||
|     // v3 := table[val1&mask] | ||||
|     MOVW    0(table)(AX*2), AX      // AX - v1 | ||||
|  | ||||
|     // br{{ var "id"}}.advance(uint8(v1.entry)) | ||||
|     MOVB    AH, BH                  // BH = uint8(v1.entry >> 8) | ||||
|     MOVBQZX AL, CX | ||||
|     SHLQ    CX, br_value            // value <<= n | ||||
|     ADDQ    CX, br_bits_read        // bits_read += n | ||||
|  | ||||
|  | ||||
|     // these two writes get coalesced | ||||
|     // buf[stream][off+2] = uint8(v2.entry >> 8) | ||||
|     // buf[stream][off+3] = uint8(v3.entry >> 8) | ||||
|     MOVW    BX, {{ var "bufofs" }}+2(buffer)(off*1) | ||||
|  | ||||
|     // update the bitrader reader structure | ||||
|     MOVB    br_bits_read, bitReaderShifted_bitsRead(br{{ var "id" }}) | ||||
|     MOVQ    br_value, bitReaderShifted_value(br{{ var "id" }}) | ||||
|     MOVQ    br_offset, bitReaderShifted_off(br{{ var "id" }}) | ||||
| {{ end }} | ||||
|  | ||||
|     {{ set "id" "0" }} | ||||
|     {{ set "ofs" "0" }} | ||||
|     {{ set "bufofs" "0" }} {{/* id * bufoff */}} | ||||
|     {{ template "decode_2_values_x86" . }} | ||||
|  | ||||
|     {{ set "id" "1" }} | ||||
|     {{ set "ofs" "8" }} | ||||
|     {{ set "bufofs" "256" }} | ||||
|     {{ template "decode_2_values_x86" . }} | ||||
|  | ||||
|     {{ set "id" "2" }} | ||||
|     {{ set "ofs" "16" }} | ||||
|     {{ set "bufofs" "512" }} | ||||
|     {{ template "decode_2_values_x86" . }} | ||||
|  | ||||
|     {{ set "id" "3" }} | ||||
|     {{ set "ofs" "24" }} | ||||
|     {{ set "bufofs" "768" }} | ||||
|     {{ template "decode_2_values_x86" . }} | ||||
|  | ||||
|     ADDQ    $4, off     // off += 2 | ||||
|  | ||||
|     TESTB   DH, DH      // any br[i].ofs < 4? | ||||
|     JNZ     end | ||||
|  | ||||
|     CMPQ    off, $bufoff | ||||
|     JL      main_loop | ||||
| end: | ||||
|     MOVQ    0(SP), BP | ||||
|  | ||||
|     MOVB    off, ret+56(FP) | ||||
|     RET | ||||
| #undef  off | ||||
| #undef  buffer | ||||
| #undef  table | ||||
|  | ||||
| #undef  br_bits_read | ||||
| #undef  br_value | ||||
| #undef  br_offset | ||||
| #undef  peek_bits | ||||
| #undef  exhausted | ||||
|  | ||||
| #undef  br0 | ||||
| #undef  br1 | ||||
| #undef  br2 | ||||
| #undef  br3 | ||||
							
								
								
									
										181
									
								
								vendor/github.com/klauspost/compress/huff0/decompress_amd64.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										181
									
								
								vendor/github.com/klauspost/compress/huff0/decompress_amd64.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,181 @@ | ||||
| //go:build amd64 && !appengine && !noasm && gc | ||||
| // +build amd64,!appengine,!noasm,gc | ||||
|  | ||||
| // This file contains the specialisation of Decoder.Decompress4X | ||||
| // that uses an asm implementation of its main loop. | ||||
| package huff0 | ||||
|  | ||||
| import ( | ||||
| 	"errors" | ||||
| 	"fmt" | ||||
| ) | ||||
|  | ||||
| // decompress4x_main_loop_x86 is an x86 assembler implementation | ||||
| // of Decompress4X when tablelog > 8. | ||||
| // go:noescape | ||||
| func decompress4x_main_loop_x86(pbr0, pbr1, pbr2, pbr3 *bitReaderShifted, | ||||
| 	peekBits uint8, buf *byte, tbl *dEntrySingle) uint8 | ||||
|  | ||||
| // decompress4x_8b_loop_x86 is an x86 assembler implementation | ||||
| // of Decompress4X when tablelog <= 8 which decodes 4 entries | ||||
| // per loop. | ||||
| // go:noescape | ||||
| func decompress4x_8b_loop_x86(pbr0, pbr1, pbr2, pbr3 *bitReaderShifted, | ||||
| 	peekBits uint8, buf *byte, tbl *dEntrySingle) uint8 | ||||
|  | ||||
| // fallback8BitSize is the size where using Go version is faster. | ||||
| const fallback8BitSize = 800 | ||||
|  | ||||
| // Decompress4X will decompress a 4X encoded stream. | ||||
| // The length of the supplied input must match the end of a block exactly. | ||||
| // The *capacity* of the dst slice must match the destination size of | ||||
| // the uncompressed data exactly. | ||||
| func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) { | ||||
| 	if len(d.dt.single) == 0 { | ||||
| 		return nil, errors.New("no table loaded") | ||||
| 	} | ||||
| 	if len(src) < 6+(4*1) { | ||||
| 		return nil, errors.New("input too small") | ||||
| 	} | ||||
|  | ||||
| 	use8BitTables := d.actualTableLog <= 8 | ||||
| 	if cap(dst) < fallback8BitSize && use8BitTables { | ||||
| 		return d.decompress4X8bit(dst, src) | ||||
| 	} | ||||
| 	var br [4]bitReaderShifted | ||||
| 	// Decode "jump table" | ||||
| 	start := 6 | ||||
| 	for i := 0; i < 3; i++ { | ||||
| 		length := int(src[i*2]) | (int(src[i*2+1]) << 8) | ||||
| 		if start+length >= len(src) { | ||||
| 			return nil, errors.New("truncated input (or invalid offset)") | ||||
| 		} | ||||
| 		err := br[i].init(src[start : start+length]) | ||||
| 		if err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 		start += length | ||||
| 	} | ||||
| 	err := br[3].init(src[start:]) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
|  | ||||
| 	// destination, offset to match first output | ||||
| 	dstSize := cap(dst) | ||||
| 	dst = dst[:dstSize] | ||||
| 	out := dst | ||||
| 	dstEvery := (dstSize + 3) / 4 | ||||
|  | ||||
| 	const tlSize = 1 << tableLogMax | ||||
| 	const tlMask = tlSize - 1 | ||||
| 	single := d.dt.single[:tlSize] | ||||
|  | ||||
| 	// Use temp table to avoid bound checks/append penalty. | ||||
| 	buf := d.buffer() | ||||
| 	var off uint8 | ||||
| 	var decoded int | ||||
|  | ||||
| 	const debug = false | ||||
|  | ||||
| 	// see: bitReaderShifted.peekBitsFast() | ||||
| 	peekBits := uint8((64 - d.actualTableLog) & 63) | ||||
|  | ||||
| 	// Decode 2 values from each decoder/loop. | ||||
| 	const bufoff = 256 | ||||
| 	for { | ||||
| 		if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 { | ||||
| 			break | ||||
| 		} | ||||
|  | ||||
| 		if use8BitTables { | ||||
| 			off = decompress4x_8b_loop_x86(&br[0], &br[1], &br[2], &br[3], peekBits, &buf[0][0], &single[0]) | ||||
| 		} else { | ||||
| 			off = decompress4x_main_loop_x86(&br[0], &br[1], &br[2], &br[3], peekBits, &buf[0][0], &single[0]) | ||||
| 		} | ||||
| 		if debug { | ||||
| 			fmt.Print("DEBUG: ") | ||||
| 			fmt.Printf("off=%d,", off) | ||||
| 			for i := 0; i < 4; i++ { | ||||
| 				fmt.Printf(" br[%d]={bitsRead=%d, value=%x, off=%d}", | ||||
| 					i, br[i].bitsRead, br[i].value, br[i].off) | ||||
| 			} | ||||
| 			fmt.Println("") | ||||
| 		} | ||||
|  | ||||
| 		if off != 0 { | ||||
| 			break | ||||
| 		} | ||||
|  | ||||
| 		if bufoff > dstEvery { | ||||
| 			d.bufs.Put(buf) | ||||
| 			return nil, errors.New("corruption detected: stream overrun 1") | ||||
| 		} | ||||
| 		copy(out, buf[0][:]) | ||||
| 		copy(out[dstEvery:], buf[1][:]) | ||||
| 		copy(out[dstEvery*2:], buf[2][:]) | ||||
| 		copy(out[dstEvery*3:], buf[3][:]) | ||||
| 		out = out[bufoff:] | ||||
| 		decoded += bufoff * 4 | ||||
| 		// There must at least be 3 buffers left. | ||||
| 		if len(out) < dstEvery*3 { | ||||
| 			d.bufs.Put(buf) | ||||
| 			return nil, errors.New("corruption detected: stream overrun 2") | ||||
| 		} | ||||
| 	} | ||||
| 	if off > 0 { | ||||
| 		ioff := int(off) | ||||
| 		if len(out) < dstEvery*3+ioff { | ||||
| 			d.bufs.Put(buf) | ||||
| 			return nil, errors.New("corruption detected: stream overrun 3") | ||||
| 		} | ||||
| 		copy(out, buf[0][:off]) | ||||
| 		copy(out[dstEvery:], buf[1][:off]) | ||||
| 		copy(out[dstEvery*2:], buf[2][:off]) | ||||
| 		copy(out[dstEvery*3:], buf[3][:off]) | ||||
| 		decoded += int(off) * 4 | ||||
| 		out = out[off:] | ||||
| 	} | ||||
|  | ||||
| 	// Decode remaining. | ||||
| 	remainBytes := dstEvery - (decoded / 4) | ||||
| 	for i := range br { | ||||
| 		offset := dstEvery * i | ||||
| 		endsAt := offset + remainBytes | ||||
| 		if endsAt > len(out) { | ||||
| 			endsAt = len(out) | ||||
| 		} | ||||
| 		br := &br[i] | ||||
| 		bitsLeft := br.remaining() | ||||
| 		for bitsLeft > 0 { | ||||
| 			br.fill() | ||||
| 			if offset >= endsAt { | ||||
| 				d.bufs.Put(buf) | ||||
| 				return nil, errors.New("corruption detected: stream overrun 4") | ||||
| 			} | ||||
|  | ||||
| 			// Read value and increment offset. | ||||
| 			val := br.peekBitsFast(d.actualTableLog) | ||||
| 			v := single[val&tlMask].entry | ||||
| 			nBits := uint8(v) | ||||
| 			br.advance(nBits) | ||||
| 			bitsLeft -= uint(nBits) | ||||
| 			out[offset] = uint8(v >> 8) | ||||
| 			offset++ | ||||
| 		} | ||||
| 		if offset != endsAt { | ||||
| 			d.bufs.Put(buf) | ||||
| 			return nil, fmt.Errorf("corruption detected: short output block %d, end %d != %d", i, offset, endsAt) | ||||
| 		} | ||||
| 		decoded += offset - dstEvery*i | ||||
| 		err = br.close() | ||||
| 		if err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 	} | ||||
| 	d.bufs.Put(buf) | ||||
| 	if dstSize != decoded { | ||||
| 		return nil, errors.New("corruption detected: short output block") | ||||
| 	} | ||||
| 	return dst, nil | ||||
| } | ||||
							
								
								
									
										506
									
								
								vendor/github.com/klauspost/compress/huff0/decompress_amd64.s
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										506
									
								
								vendor/github.com/klauspost/compress/huff0/decompress_amd64.s
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,506 @@ | ||||
| // +build !appengine | ||||
| // +build gc | ||||
| // +build !noasm | ||||
|  | ||||
| #include "textflag.h" | ||||
| #include "funcdata.h" | ||||
| #include "go_asm.h" | ||||
|  | ||||
| #ifdef GOAMD64_v4 | ||||
| #ifndef GOAMD64_v3 | ||||
| #define GOAMD64_v3 | ||||
| #endif | ||||
| #endif | ||||
|  | ||||
| #define bufoff      256 // see decompress.go, we're using [4][256]byte table | ||||
|  | ||||
| // func decompress4x_main_loop_x86(pbr0, pbr1, pbr2, pbr3 *bitReaderShifted, | ||||
| //	peekBits uint8, buf *byte, tbl *dEntrySingle) (int, bool) | ||||
| TEXT ·decompress4x_main_loop_x86(SB), NOSPLIT, $8 | ||||
| #define off             R8 | ||||
| #define buffer          DI | ||||
| #define table           SI | ||||
|  | ||||
| #define br_bits_read    R9 | ||||
| #define br_value        R10 | ||||
| #define br_offset       R11 | ||||
| #define peek_bits       R12 | ||||
| #define exhausted       DX | ||||
|  | ||||
| #define br0             R13 | ||||
| #define br1             R14 | ||||
| #define br2             R15 | ||||
| #define br3             BP | ||||
|  | ||||
| 	MOVQ BP, 0(SP) | ||||
|  | ||||
| 	XORQ exhausted, exhausted // exhausted = false | ||||
| 	XORQ off, off             // off = 0 | ||||
|  | ||||
| 	MOVBQZX peekBits+32(FP), peek_bits | ||||
| 	MOVQ    buf+40(FP), buffer | ||||
| 	MOVQ    tbl+48(FP), table | ||||
|  | ||||
| 	MOVQ pbr0+0(FP), br0 | ||||
| 	MOVQ pbr1+8(FP), br1 | ||||
| 	MOVQ pbr2+16(FP), br2 | ||||
| 	MOVQ pbr3+24(FP), br3 | ||||
|  | ||||
| main_loop: | ||||
|  | ||||
| 	// const stream = 0 | ||||
| 	// br0.fillFast() | ||||
| 	MOVBQZX bitReaderShifted_bitsRead(br0), br_bits_read | ||||
| 	MOVQ    bitReaderShifted_value(br0), br_value | ||||
| 	MOVQ    bitReaderShifted_off(br0), br_offset | ||||
|  | ||||
| 	// We must have at least 2 * max tablelog left | ||||
| 	CMPQ br_bits_read, $64-22 | ||||
| 	JBE  skip_fill0 | ||||
|  | ||||
| 	SUBQ $32, br_bits_read // b.bitsRead -= 32 | ||||
| 	SUBQ $4, br_offset     // b.off -= 4 | ||||
|  | ||||
| 	// v := b.in[b.off-4 : b.off] | ||||
| 	// v = v[:4] | ||||
| 	// low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) | ||||
| 	MOVQ bitReaderShifted_in(br0), AX | ||||
|  | ||||
| 	// b.value |= uint64(low) << (b.bitsRead & 63) | ||||
| #ifdef GOAMD64_v3 | ||||
| 	SHLXQ br_bits_read, 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4]) << (b.bitsRead & 63) | ||||
|  | ||||
| #else | ||||
| 	MOVL 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4]) | ||||
| 	MOVQ br_bits_read, CX | ||||
| 	SHLQ CL, AX | ||||
|  | ||||
| #endif | ||||
|  | ||||
| 	ORQ AX, br_value | ||||
|  | ||||
| 	// exhausted = exhausted || (br0.off < 4) | ||||
| 	CMPQ  br_offset, $4 | ||||
| 	SETLT DL | ||||
| 	ORB   DL, DH | ||||
|  | ||||
| 	// } | ||||
| skip_fill0: | ||||
|  | ||||
| 	// val0 := br0.peekTopBits(peekBits) | ||||
| #ifdef GOAMD64_v3 | ||||
| 	SHRXQ peek_bits, br_value, AX // AX = (value >> peek_bits) & mask | ||||
|  | ||||
| #else | ||||
| 	MOVQ br_value, AX | ||||
| 	MOVQ peek_bits, CX | ||||
| 	SHRQ CL, AX        // AX = (value >> peek_bits) & mask | ||||
|  | ||||
| #endif | ||||
|  | ||||
| 	// v0 := table[val0&mask] | ||||
| 	MOVW 0(table)(AX*2), AX // AX - v0 | ||||
|  | ||||
| 	// br0.advance(uint8(v0.entry)) | ||||
| 	MOVB AH, BL // BL = uint8(v0.entry >> 8) | ||||
|  | ||||
| #ifdef GOAMD64_v3 | ||||
| 	MOVBQZX AL, CX | ||||
| 	SHLXQ   AX, br_value, br_value // value <<= n | ||||
|  | ||||
| #else | ||||
| 	MOVBQZX AL, CX | ||||
| 	SHLQ    CL, br_value // value <<= n | ||||
|  | ||||
| #endif | ||||
|  | ||||
| 	ADDQ CX, br_bits_read // bits_read += n | ||||
|  | ||||
| #ifdef GOAMD64_v3 | ||||
| 	SHRXQ peek_bits, br_value, AX // AX = (value >> peek_bits) & mask | ||||
|  | ||||
| #else | ||||
| 	// val1 := br0.peekTopBits(peekBits) | ||||
| 	MOVQ peek_bits, CX | ||||
| 	MOVQ br_value, AX | ||||
| 	SHRQ CL, AX        // AX = (value >> peek_bits) & mask | ||||
|  | ||||
| #endif | ||||
|  | ||||
| 	// v1 := table[val1&mask] | ||||
| 	MOVW 0(table)(AX*2), AX // AX - v1 | ||||
|  | ||||
| 	// br0.advance(uint8(v1.entry)) | ||||
| 	MOVB AH, BH // BH = uint8(v1.entry >> 8) | ||||
|  | ||||
| #ifdef GOAMD64_v3 | ||||
| 	MOVBQZX AL, CX | ||||
| 	SHLXQ   AX, br_value, br_value // value <<= n | ||||
|  | ||||
| #else | ||||
| 	MOVBQZX AL, CX | ||||
| 	SHLQ    CL, br_value // value <<= n | ||||
|  | ||||
| #endif | ||||
|  | ||||
| 	ADDQ CX, br_bits_read // bits_read += n | ||||
|  | ||||
| 	// these two writes get coalesced | ||||
| 	// buf[stream][off] = uint8(v0.entry >> 8) | ||||
| 	// buf[stream][off+1] = uint8(v1.entry >> 8) | ||||
| 	MOVW BX, 0(buffer)(off*1) | ||||
|  | ||||
| 	// update the bitrader reader structure | ||||
| 	MOVB br_bits_read, bitReaderShifted_bitsRead(br0) | ||||
| 	MOVQ br_value, bitReaderShifted_value(br0) | ||||
| 	MOVQ br_offset, bitReaderShifted_off(br0) | ||||
|  | ||||
| 	// const stream = 1 | ||||
| 	// br1.fillFast() | ||||
| 	MOVBQZX bitReaderShifted_bitsRead(br1), br_bits_read | ||||
| 	MOVQ    bitReaderShifted_value(br1), br_value | ||||
| 	MOVQ    bitReaderShifted_off(br1), br_offset | ||||
|  | ||||
| 	// We must have at least 2 * max tablelog left | ||||
| 	CMPQ br_bits_read, $64-22 | ||||
| 	JBE  skip_fill1 | ||||
|  | ||||
| 	SUBQ $32, br_bits_read // b.bitsRead -= 32 | ||||
| 	SUBQ $4, br_offset     // b.off -= 4 | ||||
|  | ||||
| 	// v := b.in[b.off-4 : b.off] | ||||
| 	// v = v[:4] | ||||
| 	// low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) | ||||
| 	MOVQ bitReaderShifted_in(br1), AX | ||||
|  | ||||
| 	// b.value |= uint64(low) << (b.bitsRead & 63) | ||||
| #ifdef GOAMD64_v3 | ||||
| 	SHLXQ br_bits_read, 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4]) << (b.bitsRead & 63) | ||||
|  | ||||
| #else | ||||
| 	MOVL 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4]) | ||||
| 	MOVQ br_bits_read, CX | ||||
| 	SHLQ CL, AX | ||||
|  | ||||
| #endif | ||||
|  | ||||
| 	ORQ AX, br_value | ||||
|  | ||||
| 	// exhausted = exhausted || (br1.off < 4) | ||||
| 	CMPQ  br_offset, $4 | ||||
| 	SETLT DL | ||||
| 	ORB   DL, DH | ||||
|  | ||||
| 	// } | ||||
| skip_fill1: | ||||
|  | ||||
| 	// val0 := br1.peekTopBits(peekBits) | ||||
| #ifdef GOAMD64_v3 | ||||
| 	SHRXQ peek_bits, br_value, AX // AX = (value >> peek_bits) & mask | ||||
|  | ||||
| #else | ||||
| 	MOVQ br_value, AX | ||||
| 	MOVQ peek_bits, CX | ||||
| 	SHRQ CL, AX        // AX = (value >> peek_bits) & mask | ||||
|  | ||||
| #endif | ||||
|  | ||||
| 	// v0 := table[val0&mask] | ||||
| 	MOVW 0(table)(AX*2), AX // AX - v0 | ||||
|  | ||||
| 	// br1.advance(uint8(v0.entry)) | ||||
| 	MOVB AH, BL // BL = uint8(v0.entry >> 8) | ||||
|  | ||||
| #ifdef GOAMD64_v3 | ||||
| 	MOVBQZX AL, CX | ||||
| 	SHLXQ   AX, br_value, br_value // value <<= n | ||||
|  | ||||
| #else | ||||
| 	MOVBQZX AL, CX | ||||
| 	SHLQ    CL, br_value // value <<= n | ||||
|  | ||||
| #endif | ||||
|  | ||||
| 	ADDQ CX, br_bits_read // bits_read += n | ||||
|  | ||||
| #ifdef GOAMD64_v3 | ||||
| 	SHRXQ peek_bits, br_value, AX // AX = (value >> peek_bits) & mask | ||||
|  | ||||
| #else | ||||
| 	// val1 := br1.peekTopBits(peekBits) | ||||
| 	MOVQ peek_bits, CX | ||||
| 	MOVQ br_value, AX | ||||
| 	SHRQ CL, AX        // AX = (value >> peek_bits) & mask | ||||
|  | ||||
| #endif | ||||
|  | ||||
| 	// v1 := table[val1&mask] | ||||
| 	MOVW 0(table)(AX*2), AX // AX - v1 | ||||
|  | ||||
| 	// br1.advance(uint8(v1.entry)) | ||||
| 	MOVB AH, BH // BH = uint8(v1.entry >> 8) | ||||
|  | ||||
| #ifdef GOAMD64_v3 | ||||
| 	MOVBQZX AL, CX | ||||
| 	SHLXQ   AX, br_value, br_value // value <<= n | ||||
|  | ||||
| #else | ||||
| 	MOVBQZX AL, CX | ||||
| 	SHLQ    CL, br_value // value <<= n | ||||
|  | ||||
| #endif | ||||
|  | ||||
| 	ADDQ CX, br_bits_read // bits_read += n | ||||
|  | ||||
| 	// these two writes get coalesced | ||||
| 	// buf[stream][off] = uint8(v0.entry >> 8) | ||||
| 	// buf[stream][off+1] = uint8(v1.entry >> 8) | ||||
| 	MOVW BX, 256(buffer)(off*1) | ||||
|  | ||||
| 	// update the bitrader reader structure | ||||
| 	MOVB br_bits_read, bitReaderShifted_bitsRead(br1) | ||||
| 	MOVQ br_value, bitReaderShifted_value(br1) | ||||
| 	MOVQ br_offset, bitReaderShifted_off(br1) | ||||
|  | ||||
| 	// const stream = 2 | ||||
| 	// br2.fillFast() | ||||
| 	MOVBQZX bitReaderShifted_bitsRead(br2), br_bits_read | ||||
| 	MOVQ    bitReaderShifted_value(br2), br_value | ||||
| 	MOVQ    bitReaderShifted_off(br2), br_offset | ||||
|  | ||||
| 	// We must have at least 2 * max tablelog left | ||||
| 	CMPQ br_bits_read, $64-22 | ||||
| 	JBE  skip_fill2 | ||||
|  | ||||
| 	SUBQ $32, br_bits_read // b.bitsRead -= 32 | ||||
| 	SUBQ $4, br_offset     // b.off -= 4 | ||||
|  | ||||
| 	// v := b.in[b.off-4 : b.off] | ||||
| 	// v = v[:4] | ||||
| 	// low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) | ||||
| 	MOVQ bitReaderShifted_in(br2), AX | ||||
|  | ||||
| 	// b.value |= uint64(low) << (b.bitsRead & 63) | ||||
| #ifdef GOAMD64_v3 | ||||
| 	SHLXQ br_bits_read, 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4]) << (b.bitsRead & 63) | ||||
|  | ||||
| #else | ||||
| 	MOVL 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4]) | ||||
| 	MOVQ br_bits_read, CX | ||||
| 	SHLQ CL, AX | ||||
|  | ||||
| #endif | ||||
|  | ||||
| 	ORQ AX, br_value | ||||
|  | ||||
| 	// exhausted = exhausted || (br2.off < 4) | ||||
| 	CMPQ  br_offset, $4 | ||||
| 	SETLT DL | ||||
| 	ORB   DL, DH | ||||
|  | ||||
| 	// } | ||||
| skip_fill2: | ||||
|  | ||||
| 	// val0 := br2.peekTopBits(peekBits) | ||||
| #ifdef GOAMD64_v3 | ||||
| 	SHRXQ peek_bits, br_value, AX // AX = (value >> peek_bits) & mask | ||||
|  | ||||
| #else | ||||
| 	MOVQ br_value, AX | ||||
| 	MOVQ peek_bits, CX | ||||
| 	SHRQ CL, AX        // AX = (value >> peek_bits) & mask | ||||
|  | ||||
| #endif | ||||
|  | ||||
| 	// v0 := table[val0&mask] | ||||
| 	MOVW 0(table)(AX*2), AX // AX - v0 | ||||
|  | ||||
| 	// br2.advance(uint8(v0.entry)) | ||||
| 	MOVB AH, BL // BL = uint8(v0.entry >> 8) | ||||
|  | ||||
| #ifdef GOAMD64_v3 | ||||
| 	MOVBQZX AL, CX | ||||
| 	SHLXQ   AX, br_value, br_value // value <<= n | ||||
|  | ||||
| #else | ||||
| 	MOVBQZX AL, CX | ||||
| 	SHLQ    CL, br_value // value <<= n | ||||
|  | ||||
| #endif | ||||
|  | ||||
| 	ADDQ CX, br_bits_read // bits_read += n | ||||
|  | ||||
| #ifdef GOAMD64_v3 | ||||
| 	SHRXQ peek_bits, br_value, AX // AX = (value >> peek_bits) & mask | ||||
|  | ||||
| #else | ||||
| 	// val1 := br2.peekTopBits(peekBits) | ||||
| 	MOVQ peek_bits, CX | ||||
| 	MOVQ br_value, AX | ||||
| 	SHRQ CL, AX        // AX = (value >> peek_bits) & mask | ||||
|  | ||||
| #endif | ||||
|  | ||||
| 	// v1 := table[val1&mask] | ||||
| 	MOVW 0(table)(AX*2), AX // AX - v1 | ||||
|  | ||||
| 	// br2.advance(uint8(v1.entry)) | ||||
| 	MOVB AH, BH // BH = uint8(v1.entry >> 8) | ||||
|  | ||||
| #ifdef GOAMD64_v3 | ||||
| 	MOVBQZX AL, CX | ||||
| 	SHLXQ   AX, br_value, br_value // value <<= n | ||||
|  | ||||
| #else | ||||
| 	MOVBQZX AL, CX | ||||
| 	SHLQ    CL, br_value // value <<= n | ||||
|  | ||||
| #endif | ||||
|  | ||||
| 	ADDQ CX, br_bits_read // bits_read += n | ||||
|  | ||||
| 	// these two writes get coalesced | ||||
| 	// buf[stream][off] = uint8(v0.entry >> 8) | ||||
| 	// buf[stream][off+1] = uint8(v1.entry >> 8) | ||||
| 	MOVW BX, 512(buffer)(off*1) | ||||
|  | ||||
| 	// update the bitrader reader structure | ||||
| 	MOVB br_bits_read, bitReaderShifted_bitsRead(br2) | ||||
| 	MOVQ br_value, bitReaderShifted_value(br2) | ||||
| 	MOVQ br_offset, bitReaderShifted_off(br2) | ||||
|  | ||||
| 	// const stream = 3 | ||||
| 	// br3.fillFast() | ||||
| 	MOVBQZX bitReaderShifted_bitsRead(br3), br_bits_read | ||||
| 	MOVQ    bitReaderShifted_value(br3), br_value | ||||
| 	MOVQ    bitReaderShifted_off(br3), br_offset | ||||
|  | ||||
| 	// We must have at least 2 * max tablelog left | ||||
| 	CMPQ br_bits_read, $64-22 | ||||
| 	JBE  skip_fill3 | ||||
|  | ||||
| 	SUBQ $32, br_bits_read // b.bitsRead -= 32 | ||||
| 	SUBQ $4, br_offset     // b.off -= 4 | ||||
|  | ||||
| 	// v := b.in[b.off-4 : b.off] | ||||
| 	// v = v[:4] | ||||
| 	// low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) | ||||
| 	MOVQ bitReaderShifted_in(br3), AX | ||||
|  | ||||
| 	// b.value |= uint64(low) << (b.bitsRead & 63) | ||||
| #ifdef GOAMD64_v3 | ||||
| 	SHLXQ br_bits_read, 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4]) << (b.bitsRead & 63) | ||||
|  | ||||
| #else | ||||
| 	MOVL 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4]) | ||||
| 	MOVQ br_bits_read, CX | ||||
| 	SHLQ CL, AX | ||||
|  | ||||
| #endif | ||||
|  | ||||
| 	ORQ AX, br_value | ||||
|  | ||||
| 	// exhausted = exhausted || (br3.off < 4) | ||||
| 	CMPQ  br_offset, $4 | ||||
| 	SETLT DL | ||||
| 	ORB   DL, DH | ||||
|  | ||||
| 	// } | ||||
| skip_fill3: | ||||
|  | ||||
| 	// val0 := br3.peekTopBits(peekBits) | ||||
| #ifdef GOAMD64_v3 | ||||
| 	SHRXQ peek_bits, br_value, AX // AX = (value >> peek_bits) & mask | ||||
|  | ||||
| #else | ||||
| 	MOVQ br_value, AX | ||||
| 	MOVQ peek_bits, CX | ||||
| 	SHRQ CL, AX        // AX = (value >> peek_bits) & mask | ||||
|  | ||||
| #endif | ||||
|  | ||||
| 	// v0 := table[val0&mask] | ||||
| 	MOVW 0(table)(AX*2), AX // AX - v0 | ||||
|  | ||||
| 	// br3.advance(uint8(v0.entry)) | ||||
| 	MOVB AH, BL // BL = uint8(v0.entry >> 8) | ||||
|  | ||||
| #ifdef GOAMD64_v3 | ||||
| 	MOVBQZX AL, CX | ||||
| 	SHLXQ   AX, br_value, br_value // value <<= n | ||||
|  | ||||
| #else | ||||
| 	MOVBQZX AL, CX | ||||
| 	SHLQ    CL, br_value // value <<= n | ||||
|  | ||||
| #endif | ||||
|  | ||||
| 	ADDQ CX, br_bits_read // bits_read += n | ||||
|  | ||||
| #ifdef GOAMD64_v3 | ||||
| 	SHRXQ peek_bits, br_value, AX // AX = (value >> peek_bits) & mask | ||||
|  | ||||
| #else | ||||
| 	// val1 := br3.peekTopBits(peekBits) | ||||
| 	MOVQ peek_bits, CX | ||||
| 	MOVQ br_value, AX | ||||
| 	SHRQ CL, AX        // AX = (value >> peek_bits) & mask | ||||
|  | ||||
| #endif | ||||
|  | ||||
| 	// v1 := table[val1&mask] | ||||
| 	MOVW 0(table)(AX*2), AX // AX - v1 | ||||
|  | ||||
| 	// br3.advance(uint8(v1.entry)) | ||||
| 	MOVB AH, BH // BH = uint8(v1.entry >> 8) | ||||
|  | ||||
| #ifdef GOAMD64_v3 | ||||
| 	MOVBQZX AL, CX | ||||
| 	SHLXQ   AX, br_value, br_value // value <<= n | ||||
|  | ||||
| #else | ||||
| 	MOVBQZX AL, CX | ||||
| 	SHLQ    CL, br_value // value <<= n | ||||
|  | ||||
| #endif | ||||
|  | ||||
| 	ADDQ CX, br_bits_read // bits_read += n | ||||
|  | ||||
| 	// these two writes get coalesced | ||||
| 	// buf[stream][off] = uint8(v0.entry >> 8) | ||||
| 	// buf[stream][off+1] = uint8(v1.entry >> 8) | ||||
| 	MOVW BX, 768(buffer)(off*1) | ||||
|  | ||||
| 	// update the bitrader reader structure | ||||
| 	MOVB br_bits_read, bitReaderShifted_bitsRead(br3) | ||||
| 	MOVQ br_value, bitReaderShifted_value(br3) | ||||
| 	MOVQ br_offset, bitReaderShifted_off(br3) | ||||
|  | ||||
| 	ADDQ $2, off // off += 2 | ||||
|  | ||||
| 	TESTB DH, DH // any br[i].ofs < 4? | ||||
| 	JNZ   end | ||||
|  | ||||
| 	CMPQ off, $bufoff | ||||
| 	JL   main_loop | ||||
|  | ||||
| end: | ||||
| 	MOVQ 0(SP), BP | ||||
|  | ||||
| 	MOVB off, ret+56(FP) | ||||
| 	RET | ||||
|  | ||||
| #undef off | ||||
| #undef buffer | ||||
| #undef table | ||||
|  | ||||
| #undef br_bits_read | ||||
| #undef br_value | ||||
| #undef br_offset | ||||
| #undef peek_bits | ||||
| #undef exhausted | ||||
|  | ||||
| #undef br0 | ||||
| #undef br1 | ||||
| #undef br2 | ||||
| #undef br3 | ||||
							
								
								
									
										195
									
								
								vendor/github.com/klauspost/compress/huff0/decompress_amd64.s.in
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										195
									
								
								vendor/github.com/klauspost/compress/huff0/decompress_amd64.s.in
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,195 @@ | ||||
| // +build !appengine | ||||
| // +build gc | ||||
| // +build !noasm | ||||
|  | ||||
| #include "textflag.h" | ||||
| #include "funcdata.h" | ||||
| #include "go_asm.h" | ||||
|  | ||||
| #ifdef GOAMD64_v4 | ||||
| #ifndef GOAMD64_v3 | ||||
| #define GOAMD64_v3 | ||||
| #endif | ||||
| #endif | ||||
|  | ||||
| #define bufoff      256     // see decompress.go, we're using [4][256]byte table | ||||
|  | ||||
| //func decompress4x_main_loop_x86(pbr0, pbr1, pbr2, pbr3 *bitReaderShifted, | ||||
| //	peekBits uint8, buf *byte, tbl *dEntrySingle) (int, bool) | ||||
| TEXT ·decompress4x_main_loop_x86(SB), NOSPLIT, $8 | ||||
| #define off             R8 | ||||
| #define buffer          DI | ||||
| #define table           SI | ||||
|  | ||||
| #define br_bits_read    R9 | ||||
| #define br_value        R10 | ||||
| #define br_offset       R11 | ||||
| #define peek_bits       R12 | ||||
| #define exhausted       DX | ||||
|  | ||||
| #define br0             R13 | ||||
| #define br1             R14 | ||||
| #define br2             R15 | ||||
| #define br3             BP | ||||
|  | ||||
|     MOVQ    BP, 0(SP) | ||||
|  | ||||
|     XORQ    exhausted, exhausted    // exhausted = false | ||||
|     XORQ    off, off                // off = 0 | ||||
|  | ||||
|     MOVBQZX peekBits+32(FP), peek_bits | ||||
|     MOVQ    buf+40(FP), buffer | ||||
|     MOVQ    tbl+48(FP), table | ||||
|  | ||||
|     MOVQ    pbr0+0(FP), br0 | ||||
|     MOVQ    pbr1+8(FP), br1 | ||||
|     MOVQ    pbr2+16(FP), br2 | ||||
|     MOVQ    pbr3+24(FP), br3 | ||||
|  | ||||
| main_loop: | ||||
| {{ define "decode_2_values_x86" }} | ||||
|     // const stream = {{ var "id" }} | ||||
|     // br{{ var "id"}}.fillFast() | ||||
|     MOVBQZX bitReaderShifted_bitsRead(br{{ var "id" }}), br_bits_read | ||||
|     MOVQ    bitReaderShifted_value(br{{ var "id" }}), br_value | ||||
|     MOVQ    bitReaderShifted_off(br{{ var "id" }}), br_offset | ||||
|  | ||||
|     // We must have at least 2 * max tablelog left | ||||
|     CMPQ    br_bits_read, $64-22 | ||||
|     JBE     skip_fill{{ var "id" }} | ||||
|  | ||||
|     SUBQ    $32, br_bits_read       // b.bitsRead -= 32 | ||||
|     SUBQ    $4, br_offset           // b.off -= 4 | ||||
|  | ||||
| 	// v := b.in[b.off-4 : b.off] | ||||
| 	// v = v[:4] | ||||
| 	// low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) | ||||
|     MOVQ    bitReaderShifted_in(br{{ var "id" }}), AX | ||||
|  | ||||
| 	// b.value |= uint64(low) << (b.bitsRead & 63) | ||||
| #ifdef GOAMD64_v3 | ||||
|     SHLXQ   br_bits_read, 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4]) << (b.bitsRead & 63) | ||||
| #else | ||||
|     MOVL    0(br_offset)(AX*1), AX  // AX = uint32(b.in[b.off:b.off+4]) | ||||
|     MOVQ    br_bits_read, CX | ||||
|     SHLQ    CL, AX | ||||
| #endif | ||||
|  | ||||
|     ORQ     AX, br_value | ||||
|  | ||||
|     // exhausted = exhausted || (br{{ var "id"}}.off < 4) | ||||
|     CMPQ    br_offset, $4 | ||||
|     SETLT   DL | ||||
|     ORB     DL, DH | ||||
|     // } | ||||
| skip_fill{{ var "id" }}: | ||||
|  | ||||
|     // val0 := br{{ var "id"}}.peekTopBits(peekBits) | ||||
| #ifdef GOAMD64_v3 | ||||
|     SHRXQ   peek_bits, br_value, AX // AX = (value >> peek_bits) & mask | ||||
| #else | ||||
|     MOVQ    br_value, AX | ||||
|     MOVQ    peek_bits, CX | ||||
|     SHRQ    CL, AX                  // AX = (value >> peek_bits) & mask | ||||
| #endif | ||||
|  | ||||
|     // v0 := table[val0&mask] | ||||
|     MOVW    0(table)(AX*2), AX      // AX - v0 | ||||
|  | ||||
|     // br{{ var "id"}}.advance(uint8(v0.entry)) | ||||
|     MOVB    AH, BL                  // BL = uint8(v0.entry >> 8) | ||||
|  | ||||
| #ifdef GOAMD64_v3 | ||||
|     MOVBQZX AL, CX | ||||
|     SHLXQ   AX, br_value, br_value // value <<= n | ||||
| #else | ||||
|     MOVBQZX AL, CX | ||||
|     SHLQ    CL, br_value            // value <<= n | ||||
| #endif | ||||
|  | ||||
|     ADDQ    CX, br_bits_read        // bits_read += n | ||||
|  | ||||
|  | ||||
| #ifdef GOAMD64_v3 | ||||
|     SHRXQ    peek_bits, br_value, AX  // AX = (value >> peek_bits) & mask | ||||
| #else | ||||
|     // val1 := br{{ var "id"}}.peekTopBits(peekBits) | ||||
|     MOVQ    peek_bits, CX | ||||
|     MOVQ    br_value, AX | ||||
|     SHRQ    CL, AX                  // AX = (value >> peek_bits) & mask | ||||
| #endif | ||||
|  | ||||
|     // v1 := table[val1&mask] | ||||
|     MOVW    0(table)(AX*2), AX      // AX - v1 | ||||
|  | ||||
|     // br{{ var "id"}}.advance(uint8(v1.entry)) | ||||
|     MOVB    AH, BH                  // BH = uint8(v1.entry >> 8) | ||||
|  | ||||
| #ifdef GOAMD64_v3 | ||||
|     MOVBQZX AL, CX | ||||
|     SHLXQ   AX, br_value, br_value // value <<= n | ||||
| #else | ||||
|     MOVBQZX AL, CX | ||||
|     SHLQ    CL, br_value            // value <<= n | ||||
| #endif | ||||
|  | ||||
|     ADDQ    CX, br_bits_read        // bits_read += n | ||||
|  | ||||
|  | ||||
|     // these two writes get coalesced | ||||
|     // buf[stream][off] = uint8(v0.entry >> 8) | ||||
|     // buf[stream][off+1] = uint8(v1.entry >> 8) | ||||
|     MOVW    BX, {{ var "bufofs" }}(buffer)(off*1) | ||||
|  | ||||
|     // update the bitrader reader structure | ||||
|     MOVB    br_bits_read, bitReaderShifted_bitsRead(br{{ var "id" }}) | ||||
|     MOVQ    br_value, bitReaderShifted_value(br{{ var "id" }}) | ||||
|     MOVQ    br_offset, bitReaderShifted_off(br{{ var "id" }}) | ||||
| {{ end }} | ||||
|  | ||||
|     {{ set "id" "0" }} | ||||
|     {{ set "ofs" "0" }} | ||||
|     {{ set "bufofs" "0" }} {{/* id * bufoff */}} | ||||
|     {{ template "decode_2_values_x86" . }} | ||||
|  | ||||
|     {{ set "id" "1" }} | ||||
|     {{ set "ofs" "8" }} | ||||
|     {{ set "bufofs" "256" }} | ||||
|     {{ template "decode_2_values_x86" . }} | ||||
|  | ||||
|     {{ set "id" "2" }} | ||||
|     {{ set "ofs" "16" }} | ||||
|     {{ set "bufofs" "512" }} | ||||
|     {{ template "decode_2_values_x86" . }} | ||||
|  | ||||
|     {{ set "id" "3" }} | ||||
|     {{ set "ofs" "24" }} | ||||
|     {{ set "bufofs" "768" }} | ||||
|     {{ template "decode_2_values_x86" . }} | ||||
|  | ||||
|     ADDQ    $2, off     // off += 2 | ||||
|  | ||||
|     TESTB   DH, DH      // any br[i].ofs < 4? | ||||
|     JNZ     end | ||||
|  | ||||
|     CMPQ    off, $bufoff | ||||
|     JL      main_loop | ||||
| end: | ||||
|     MOVQ    0(SP), BP | ||||
|  | ||||
|     MOVB    off, ret+56(FP) | ||||
|     RET | ||||
| #undef  off | ||||
| #undef  buffer | ||||
| #undef  table | ||||
|  | ||||
| #undef  br_bits_read | ||||
| #undef  br_value | ||||
| #undef  br_offset | ||||
| #undef  peek_bits | ||||
| #undef  exhausted | ||||
|  | ||||
| #undef  br0 | ||||
| #undef  br1 | ||||
| #undef  br2 | ||||
| #undef  br3 | ||||
							
								
								
									
										193
									
								
								vendor/github.com/klauspost/compress/huff0/decompress_generic.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										193
									
								
								vendor/github.com/klauspost/compress/huff0/decompress_generic.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,193 @@ | ||||
| //go:build !amd64 || appengine || !gc || noasm | ||||
| // +build !amd64 appengine !gc noasm | ||||
|  | ||||
| // This file contains a generic implementation of Decoder.Decompress4X. | ||||
| package huff0 | ||||
|  | ||||
| import ( | ||||
| 	"errors" | ||||
| 	"fmt" | ||||
| ) | ||||
|  | ||||
| // Decompress4X will decompress a 4X encoded stream. | ||||
| // The length of the supplied input must match the end of a block exactly. | ||||
| // The *capacity* of the dst slice must match the destination size of | ||||
| // the uncompressed data exactly. | ||||
| func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) { | ||||
| 	if len(d.dt.single) == 0 { | ||||
| 		return nil, errors.New("no table loaded") | ||||
| 	} | ||||
| 	if len(src) < 6+(4*1) { | ||||
| 		return nil, errors.New("input too small") | ||||
| 	} | ||||
| 	if use8BitTables && d.actualTableLog <= 8 { | ||||
| 		return d.decompress4X8bit(dst, src) | ||||
| 	} | ||||
|  | ||||
| 	var br [4]bitReaderShifted | ||||
| 	// Decode "jump table" | ||||
| 	start := 6 | ||||
| 	for i := 0; i < 3; i++ { | ||||
| 		length := int(src[i*2]) | (int(src[i*2+1]) << 8) | ||||
| 		if start+length >= len(src) { | ||||
| 			return nil, errors.New("truncated input (or invalid offset)") | ||||
| 		} | ||||
| 		err := br[i].init(src[start : start+length]) | ||||
| 		if err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 		start += length | ||||
| 	} | ||||
| 	err := br[3].init(src[start:]) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
|  | ||||
| 	// destination, offset to match first output | ||||
| 	dstSize := cap(dst) | ||||
| 	dst = dst[:dstSize] | ||||
| 	out := dst | ||||
| 	dstEvery := (dstSize + 3) / 4 | ||||
|  | ||||
| 	const tlSize = 1 << tableLogMax | ||||
| 	const tlMask = tlSize - 1 | ||||
| 	single := d.dt.single[:tlSize] | ||||
|  | ||||
| 	// Use temp table to avoid bound checks/append penalty. | ||||
| 	buf := d.buffer() | ||||
| 	var off uint8 | ||||
| 	var decoded int | ||||
|  | ||||
| 	// Decode 2 values from each decoder/loop. | ||||
| 	const bufoff = 256 | ||||
| 	for { | ||||
| 		if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 { | ||||
| 			break | ||||
| 		} | ||||
|  | ||||
| 		{ | ||||
| 			const stream = 0 | ||||
| 			const stream2 = 1 | ||||
| 			br[stream].fillFast() | ||||
| 			br[stream2].fillFast() | ||||
|  | ||||
| 			val := br[stream].peekBitsFast(d.actualTableLog) | ||||
| 			val2 := br[stream2].peekBitsFast(d.actualTableLog) | ||||
| 			v := single[val&tlMask] | ||||
| 			v2 := single[val2&tlMask] | ||||
| 			br[stream].advance(uint8(v.entry)) | ||||
| 			br[stream2].advance(uint8(v2.entry)) | ||||
| 			buf[stream][off] = uint8(v.entry >> 8) | ||||
| 			buf[stream2][off] = uint8(v2.entry >> 8) | ||||
|  | ||||
| 			val = br[stream].peekBitsFast(d.actualTableLog) | ||||
| 			val2 = br[stream2].peekBitsFast(d.actualTableLog) | ||||
| 			v = single[val&tlMask] | ||||
| 			v2 = single[val2&tlMask] | ||||
| 			br[stream].advance(uint8(v.entry)) | ||||
| 			br[stream2].advance(uint8(v2.entry)) | ||||
| 			buf[stream][off+1] = uint8(v.entry >> 8) | ||||
| 			buf[stream2][off+1] = uint8(v2.entry >> 8) | ||||
| 		} | ||||
|  | ||||
| 		{ | ||||
| 			const stream = 2 | ||||
| 			const stream2 = 3 | ||||
| 			br[stream].fillFast() | ||||
| 			br[stream2].fillFast() | ||||
|  | ||||
| 			val := br[stream].peekBitsFast(d.actualTableLog) | ||||
| 			val2 := br[stream2].peekBitsFast(d.actualTableLog) | ||||
| 			v := single[val&tlMask] | ||||
| 			v2 := single[val2&tlMask] | ||||
| 			br[stream].advance(uint8(v.entry)) | ||||
| 			br[stream2].advance(uint8(v2.entry)) | ||||
| 			buf[stream][off] = uint8(v.entry >> 8) | ||||
| 			buf[stream2][off] = uint8(v2.entry >> 8) | ||||
|  | ||||
| 			val = br[stream].peekBitsFast(d.actualTableLog) | ||||
| 			val2 = br[stream2].peekBitsFast(d.actualTableLog) | ||||
| 			v = single[val&tlMask] | ||||
| 			v2 = single[val2&tlMask] | ||||
| 			br[stream].advance(uint8(v.entry)) | ||||
| 			br[stream2].advance(uint8(v2.entry)) | ||||
| 			buf[stream][off+1] = uint8(v.entry >> 8) | ||||
| 			buf[stream2][off+1] = uint8(v2.entry >> 8) | ||||
| 		} | ||||
|  | ||||
| 		off += 2 | ||||
|  | ||||
| 		if off == 0 { | ||||
| 			if bufoff > dstEvery { | ||||
| 				d.bufs.Put(buf) | ||||
| 				return nil, errors.New("corruption detected: stream overrun 1") | ||||
| 			} | ||||
| 			copy(out, buf[0][:]) | ||||
| 			copy(out[dstEvery:], buf[1][:]) | ||||
| 			copy(out[dstEvery*2:], buf[2][:]) | ||||
| 			copy(out[dstEvery*3:], buf[3][:]) | ||||
| 			out = out[bufoff:] | ||||
| 			decoded += bufoff * 4 | ||||
| 			// There must at least be 3 buffers left. | ||||
| 			if len(out) < dstEvery*3 { | ||||
| 				d.bufs.Put(buf) | ||||
| 				return nil, errors.New("corruption detected: stream overrun 2") | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	if off > 0 { | ||||
| 		ioff := int(off) | ||||
| 		if len(out) < dstEvery*3+ioff { | ||||
| 			d.bufs.Put(buf) | ||||
| 			return nil, errors.New("corruption detected: stream overrun 3") | ||||
| 		} | ||||
| 		copy(out, buf[0][:off]) | ||||
| 		copy(out[dstEvery:], buf[1][:off]) | ||||
| 		copy(out[dstEvery*2:], buf[2][:off]) | ||||
| 		copy(out[dstEvery*3:], buf[3][:off]) | ||||
| 		decoded += int(off) * 4 | ||||
| 		out = out[off:] | ||||
| 	} | ||||
|  | ||||
| 	// Decode remaining. | ||||
| 	remainBytes := dstEvery - (decoded / 4) | ||||
| 	for i := range br { | ||||
| 		offset := dstEvery * i | ||||
| 		endsAt := offset + remainBytes | ||||
| 		if endsAt > len(out) { | ||||
| 			endsAt = len(out) | ||||
| 		} | ||||
| 		br := &br[i] | ||||
| 		bitsLeft := br.remaining() | ||||
| 		for bitsLeft > 0 { | ||||
| 			br.fill() | ||||
| 			if offset >= endsAt { | ||||
| 				d.bufs.Put(buf) | ||||
| 				return nil, errors.New("corruption detected: stream overrun 4") | ||||
| 			} | ||||
|  | ||||
| 			// Read value and increment offset. | ||||
| 			val := br.peekBitsFast(d.actualTableLog) | ||||
| 			v := single[val&tlMask].entry | ||||
| 			nBits := uint8(v) | ||||
| 			br.advance(nBits) | ||||
| 			bitsLeft -= uint(nBits) | ||||
| 			out[offset] = uint8(v >> 8) | ||||
| 			offset++ | ||||
| 		} | ||||
| 		if offset != endsAt { | ||||
| 			d.bufs.Put(buf) | ||||
| 			return nil, fmt.Errorf("corruption detected: short output block %d, end %d != %d", i, offset, endsAt) | ||||
| 		} | ||||
| 		decoded += offset - dstEvery*i | ||||
| 		err = br.close() | ||||
| 		if err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 	} | ||||
| 	d.bufs.Put(buf) | ||||
| 	if dstSize != decoded { | ||||
| 		return nil, errors.New("corruption detected: short output block") | ||||
| 	} | ||||
| 	return dst, nil | ||||
| } | ||||
							
								
								
									
										2
									
								
								vendor/github.com/klauspost/compress/huff0/huff0.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								vendor/github.com/klauspost/compress/huff0/huff0.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -8,6 +8,7 @@ import ( | ||||
| 	"fmt" | ||||
| 	"math" | ||||
| 	"math/bits" | ||||
| 	"sync" | ||||
|  | ||||
| 	"github.com/klauspost/compress/fse" | ||||
| ) | ||||
| @@ -116,6 +117,7 @@ type Scratch struct { | ||||
| 	nodes          []nodeElt | ||||
| 	tmpOut         [4][]byte | ||||
| 	fse            *fse.Scratch | ||||
| 	decPool        sync.Pool // *[4][256]byte buffers. | ||||
| 	huffWeight     [maxSymbolValue + 1]byte | ||||
| } | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Wim
					Wim