feat: Waku v2 bridge

Issue #12610
2023-11-12 13:29:38 +01:00
parent 56e7bd01ca
commit 6d31343205
6716 changed files with 1982502 additions and 5891 deletions
--- a/vendor/github.com/RoaringBitmap/roaring/.drone.yml
+++ b/vendor/github.com/RoaringBitmap/roaring/.drone.yml
@@ -0,0 +1,20 @@
+kind: pipeline
+name: default
+
+workspace:
+  base: /go
+  path: src/github.com/RoaringBitmap/roaring
+
+steps:
+- name: test
+  image: golang
+  commands:
+  - go get -t 
+  - go test 
+  - go test -race -run TestConcurrent*
+  - go build -tags appengine 
+  - go test -tags appengine 
+  - GOARCH=386 go build
+  - GOARCH=386 go test
+  - GOARCH=arm go build
+  - GOARCH=arm64 go build
--- a/vendor/github.com/RoaringBitmap/roaring/.gitignore
+++ b/vendor/github.com/RoaringBitmap/roaring/.gitignore
@@ -0,0 +1,5 @@
+*~
+roaring-fuzz.zip
+workdir
+coverage.out
+testdata/all3.classic
--- a/vendor/github.com/RoaringBitmap/roaring/.gitmodules
+++ b/vendor/github.com/RoaringBitmap/roaring/.gitmodules
--- a/vendor/github.com/RoaringBitmap/roaring/.travis.yml
+++ b/vendor/github.com/RoaringBitmap/roaring/.travis.yml
@@ -0,0 +1,32 @@
+language: go
+sudo: false
+install:
+- go get -t github.com/RoaringBitmap/roaring
+- go get -t golang.org/x/tools/cmd/cover
+- go get -t github.com/mattn/goveralls
+- go get -t github.com/mschoch/smat
+notifications:
+  email: false
+go:
+- "1.13.x"
+- "1.14.x"
+- tip
+
+# whitelist
+branches:
+  only:
+    - master
+script:
+- goveralls -v -service travis-ci -ignore rle16_gen.go,rle_gen.go,rle.go || go test
+- go test -race -run TestConcurrent*
+- go build -tags appengine
+- go test -tags appengine
+- GOARCH=arm64 go build
+- GOARCH=386 go build
+- GOARCH=386 go test
+- GOARCH=arm go build
+- GOARCH=arm64 go build
+
+matrix:
+  allow_failures:
+  - go: tip
--- a/vendor/github.com/RoaringBitmap/roaring/AUTHORS
+++ b/vendor/github.com/RoaringBitmap/roaring/AUTHORS
@@ -0,0 +1,11 @@
+# This is the official list of roaring authors for copyright purposes. 
+
+Todd Gruben (@tgruben),
+Daniel Lemire (@lemire),
+Elliot Murphy (@statik),
+Bob Potter (@bpot),
+Tyson Maly (@tvmaly),
+Will Glynn (@willglynn),
+Brent Pedersen (@brentp)
+Maciej Biłas (@maciej),
+Joe Nall (@joenall)
--- a/vendor/github.com/RoaringBitmap/roaring/BitSliceIndexing/bsi.go
+++ b/vendor/github.com/RoaringBitmap/roaring/BitSliceIndexing/bsi.go
@@ -0,0 +1,847 @@
+package roaring
+
+import (
+	"fmt"
+	"github.com/RoaringBitmap/roaring"
+	"math/bits"
+	"runtime"
+	"sync"
+	"sync/atomic"
+)
+
+const (
+	// Min64BitSigned - Minimum 64 bit value
+	Min64BitSigned = -9223372036854775808
+	// Max64BitSigned - Maximum 64 bit value
+	Max64BitSigned = 9223372036854775807
+)
+
+// BSI is at its simplest is an array of bitmaps that represent an encoded
+// binary value.  The advantage of a BSI is that comparisons can be made
+// across ranges of values whereas a bitmap can only represent the existence
+// of a single value for a given column ID.  Another usage scenario involves
+// storage of high cardinality values.
+//
+// It depends upon the bitmap libraries.  It is not thread safe, so
+// upstream concurrency guards must be provided.
+type BSI struct {
+	bA           []*roaring.Bitmap
+	eBM          *roaring.Bitmap // Existence BitMap
+	MaxValue     int64
+	MinValue     int64
+	runOptimized bool
+}
+
+// NewBSI constructs a new BSI.  Min/Max values are optional.  If set to 0
+// then the underlying BSI will be automatically sized.
+func NewBSI(maxValue int64, minValue int64) *BSI {
+
+	bitsz := bits.Len64(uint64(minValue))
+	if bits.Len64(uint64(maxValue)) > bitsz {
+		bitsz = bits.Len64(uint64(maxValue))
+	}
+	ba := make([]*roaring.Bitmap, bitsz)
+	for i := 0; i < len(ba); i++ {
+		ba[i] = roaring.NewBitmap()
+	}
+	return &BSI{bA: ba, eBM: roaring.NewBitmap(), MaxValue: maxValue, MinValue: minValue}
+}
+
+// NewDefaultBSI constructs an auto-sized BSI
+func NewDefaultBSI() *BSI {
+	return NewBSI(int64(0), int64(0))
+}
+
+// RunOptimize attempts to further compress the runs of consecutive values found in the bitmap
+func (b *BSI) RunOptimize() {
+	b.eBM.RunOptimize()
+	for i := 0; i < len(b.bA); i++ {
+		b.bA[i].RunOptimize()
+	}
+	b.runOptimized = true
+}
+
+// HasRunCompression returns true if the bitmap benefits from run compression
+func (b *BSI) HasRunCompression() bool {
+	return b.runOptimized
+}
+
+// GetExistenceBitmap returns a pointer to the underlying existence bitmap of the BSI
+func (b *BSI) GetExistenceBitmap() *roaring.Bitmap {
+	return b.eBM
+}
+
+// ValueExists tests whether the value exists.
+func (b *BSI) ValueExists(columnID uint64) bool {
+
+	return b.eBM.Contains(uint32(columnID))
+}
+
+// GetCardinality returns a count of unique column IDs for which a value has been set.
+func (b *BSI) GetCardinality() uint64 {
+	return b.eBM.GetCardinality()
+}
+
+// BitCount returns the number of bits needed to represent values.
+func (b *BSI) BitCount() int {
+
+	return len(b.bA)
+}
+
+// SetValue sets a value for a given columnID.
+func (b *BSI) SetValue(columnID uint64, value int64) {
+
+	// If max/min values are set to zero then automatically determine bit array size
+	if b.MaxValue == 0 && b.MinValue == 0 {
+		ba := make([]*roaring.Bitmap, bits.Len64(uint64(value)))
+		for i := len(ba) - b.BitCount(); i > 0; i-- {
+			b.bA = append(b.bA, roaring.NewBitmap())
+			if b.runOptimized {
+				b.bA[i].RunOptimize()
+			}
+		}
+	}
+
+	var wg sync.WaitGroup
+
+	for i := 0; i < b.BitCount(); i++ {
+		wg.Add(1)
+		go func(j int) {
+			defer wg.Done()
+			if uint64(value)&(1<<uint64(j)) > 0 {
+				b.bA[j].Add(uint32(columnID))
+			} else {
+				b.bA[j].Remove(uint32(columnID))
+			}
+		}(i)
+	}
+	wg.Wait()
+	b.eBM.Add(uint32(columnID))
+}
+
+// GetValue gets the value at the column ID.  Second param will be false for non-existant values.
+func (b *BSI) GetValue(columnID uint64) (int64, bool) {
+	value := int64(0)
+	exists := b.eBM.Contains(uint32(columnID))
+	if !exists {
+		return value, exists
+	}
+	for i := 0; i < b.BitCount(); i++ {
+		if b.bA[i].Contains(uint32(columnID)) {
+			value |= (1 << uint64(i))
+		}
+	}
+	return int64(value), exists
+}
+
+type action func(t *task, batch []uint32, resultsChan chan *roaring.Bitmap, wg *sync.WaitGroup)
+
+func parallelExecutor(parallelism int, t *task, e action,
+	foundSet *roaring.Bitmap) *roaring.Bitmap {
+
+	var n int = parallelism
+	if n == 0 {
+		n = runtime.NumCPU()
+	}
+
+	resultsChan := make(chan *roaring.Bitmap, n)
+
+	card := foundSet.GetCardinality()
+	x := card / uint64(n)
+
+	remainder := card - (x * uint64(n))
+	var batch []uint32
+	var wg sync.WaitGroup
+	iter := foundSet.ManyIterator()
+	for i := 0; i < n; i++ {
+		if i == n-1 {
+			batch = make([]uint32, x+remainder)
+		} else {
+			batch = make([]uint32, x)
+		}
+		iter.NextMany(batch)
+		wg.Add(1)
+		go e(t, batch, resultsChan, &wg)
+	}
+
+	wg.Wait()
+
+	close(resultsChan)
+
+	ba := make([]*roaring.Bitmap, 0)
+	for bm := range resultsChan {
+		ba = append(ba, bm)
+	}
+
+	return roaring.ParOr(0, ba...)
+
+}
+
+type bsiAction func(input *BSI, batch []uint32, resultsChan chan *BSI, wg *sync.WaitGroup)
+
+func parallelExecutorBSIResults(parallelism int, input *BSI, e bsiAction, foundSet *roaring.Bitmap, sumResults bool) *BSI {
+
+	var n int = parallelism
+	if n == 0 {
+		n = runtime.NumCPU()
+	}
+
+	resultsChan := make(chan *BSI, n)
+
+	card := foundSet.GetCardinality()
+	x := card / uint64(n)
+
+	remainder := card - (x * uint64(n))
+	var batch []uint32
+	var wg sync.WaitGroup
+	iter := foundSet.ManyIterator()
+	for i := 0; i < n; i++ {
+		if i == n-1 {
+			batch = make([]uint32, x+remainder)
+		} else {
+			batch = make([]uint32, x)
+		}
+		iter.NextMany(batch)
+		wg.Add(1)
+		go e(input, batch, resultsChan, &wg)
+	}
+
+	wg.Wait()
+
+	close(resultsChan)
+
+	ba := make([]*BSI, 0)
+	for bm := range resultsChan {
+		ba = append(ba, bm)
+	}
+
+	results := NewDefaultBSI()
+	if sumResults {
+		for _, v := range ba {
+			results.Add(v)
+		}
+	} else {
+		results.ParOr(0, ba...)
+	}
+	return results
+
+}
+
+// Operation identifier
+type Operation int
+
+const (
+	// LT less than
+	LT Operation = 1 + iota
+	// LE less than or equal
+	LE
+	// EQ equal
+	EQ
+	// GE greater than or equal
+	GE
+	// GT greater than
+	GT
+	// RANGE range
+	RANGE
+	// MIN find minimum
+	MIN
+	// MAX find maximum
+	MAX
+)
+
+type task struct {
+	bsi          *BSI
+	op           Operation
+	valueOrStart int64
+	end          int64
+	values       map[int64]struct{}
+	bits         *roaring.Bitmap
+}
+
+// CompareValue compares value.
+// For all operations with the exception of RANGE, the value to be compared is specified by valueOrStart.
+// For the RANGE parameter the comparison criteria is >= valueOrStart and <= end.
+// The parallelism parameter indicates the number of CPU threads to be applied for processing.  A value
+// of zero indicates that all available CPU resources will be potentially utilized.
+//
+func (b *BSI) CompareValue(parallelism int, op Operation, valueOrStart, end int64,
+	foundSet *roaring.Bitmap) *roaring.Bitmap {
+
+	comp := &task{bsi: b, op: op, valueOrStart: valueOrStart, end: end}
+	if foundSet == nil {
+		return parallelExecutor(parallelism, comp, compareValue, b.eBM)
+	}
+	return parallelExecutor(parallelism, comp, compareValue, foundSet)
+}
+
+func compareValue(e *task, batch []uint32, resultsChan chan *roaring.Bitmap, wg *sync.WaitGroup) {
+
+	defer wg.Done()
+
+	results := roaring.NewBitmap()
+	if e.bsi.runOptimized {
+		results.RunOptimize()
+	}
+
+	x := e.bsi.BitCount()
+	startIsNegative := x == 64 && uint64(e.valueOrStart)&(1<<uint64(x-1)) > 0
+	endIsNegative := x == 64 && uint64(e.end)&(1<<uint64(x-1)) > 0
+
+	for i := 0; i < len(batch); i++ {
+		cID := batch[i]
+		eq1, eq2 := true, true
+		lt1, lt2, gt1 := false, false, false
+		j := e.bsi.BitCount() - 1
+		isNegative := false
+		if x == 64 {
+			isNegative = e.bsi.bA[j].Contains(cID)
+			j--
+		}
+		compStartValue := e.valueOrStart
+		compEndValue := e.end
+		if isNegative != startIsNegative {
+			compStartValue = ^e.valueOrStart + 1
+		}
+		if isNegative != endIsNegative {
+			compEndValue = ^e.end + 1
+		}
+		for ; j >= 0; j-- {
+			sliceContainsBit := e.bsi.bA[j].Contains(cID)
+
+			if uint64(compStartValue)&(1<<uint64(j)) > 0 {
+				// BIT in value is SET
+				if !sliceContainsBit {
+					if eq1 {
+						if (e.op == GT || e.op == GE || e.op == RANGE) && startIsNegative && !isNegative {
+							gt1 = true
+						}
+						if e.op == LT || e.op == LE {
+							if !startIsNegative || (startIsNegative == isNegative) {
+								lt1 = true
+							}
+						}
+						eq1 = false
+						break
+					}
+				}
+			} else {
+				// BIT in value is CLEAR
+				if sliceContainsBit {
+					if eq1 {
+						if (e.op == LT || e.op == LE) && isNegative && !startIsNegative {
+							lt1 = true
+						}
+						if e.op == GT || e.op == GE || e.op == RANGE {
+							if startIsNegative || (startIsNegative == isNegative) {
+								gt1 = true
+							}
+						}
+						eq1 = false
+						if e.op != RANGE {
+							break
+						}
+					}
+				}
+			}
+
+			if e.op == RANGE && uint64(compEndValue)&(1<<uint64(j)) > 0 {
+				// BIT in value is SET
+				if !sliceContainsBit {
+					if eq2 {
+						if !endIsNegative || (endIsNegative == isNegative) {
+							lt2 = true
+						}
+						eq2 = false
+						if startIsNegative && !endIsNegative {
+							break
+						}
+					}
+				}
+			} else if e.op == RANGE {
+				// BIT in value is CLEAR
+				if sliceContainsBit {
+					if eq2 {
+						if isNegative && !endIsNegative {
+							lt2 = true
+						}
+						eq2 = false
+						break
+					}
+				}
+			}
+		}
+
+		switch e.op {
+		case LT:
+			if lt1 {
+				results.Add(cID)
+			}
+		case LE:
+			if lt1 || (eq1 && (!startIsNegative || (startIsNegative && isNegative))) {
+				results.Add(cID)
+			}
+		case EQ:
+			if eq1 {
+				results.Add(cID)
+			}
+		case GE:
+			if gt1 || (eq1 && (startIsNegative || (!startIsNegative && !isNegative))) {
+				results.Add(cID)
+			}
+		case GT:
+			if gt1 {
+				results.Add(cID)
+			}
+		case RANGE:
+			if (eq1 || gt1) && (eq2 || lt2) {
+				results.Add(cID)
+			}
+		default:
+			panic(fmt.Sprintf("Unknown operation [%v]", e.op))
+		}
+	}
+
+	resultsChan <- results
+}
+
+// MinMax - Find minimum or maximum value.
+func (b *BSI) MinMax(parallelism int, op Operation, foundSet *roaring.Bitmap) int64 {
+
+	var n int = parallelism
+	if n == 0 {
+		n = runtime.NumCPU()
+	}
+
+	resultsChan := make(chan int64, n)
+
+	card := foundSet.GetCardinality()
+	x := card / uint64(n)
+
+	remainder := card - (x * uint64(n))
+	var batch []uint32
+	var wg sync.WaitGroup
+	iter := foundSet.ManyIterator()
+	for i := 0; i < n; i++ {
+		if i == n-1 {
+			batch = make([]uint32, x+remainder)
+		} else {
+			batch = make([]uint32, x)
+		}
+		iter.NextMany(batch)
+		wg.Add(1)
+		go b.minOrMax(op, batch, resultsChan, &wg)
+	}
+
+	wg.Wait()
+
+	close(resultsChan)
+	var minMax int64
+	if op == MAX {
+		minMax = Min64BitSigned
+	} else {
+		minMax = Max64BitSigned
+	}
+
+	for val := range resultsChan {
+		if (op == MAX && val > minMax) || (op == MIN && val < minMax) {
+			minMax = val
+		}
+	}
+	return minMax
+}
+
+func (b *BSI) minOrMax(op Operation, batch []uint32, resultsChan chan int64, wg *sync.WaitGroup) {
+
+	defer wg.Done()
+
+	x := b.BitCount()
+	var value int64 = Max64BitSigned
+	if op == MAX {
+		value = Min64BitSigned
+	}
+
+	for i := 0; i < len(batch); i++ {
+		cID := batch[i]
+		eq := true
+		lt, gt := false, false
+		j := b.BitCount() - 1
+		var cVal int64
+		valueIsNegative := uint64(value)&(1<<uint64(x-1)) > 0 && bits.Len64(uint64(value)) == 64
+		isNegative := false
+		if x == 64 {
+			isNegative = b.bA[j].Contains(cID)
+			if isNegative {
+				cVal |= 1 << uint64(j)
+			}
+			j--
+		}
+		compValue := value
+		if isNegative != valueIsNegative {
+			compValue = ^value + 1
+		}
+		for ; j >= 0; j-- {
+			sliceContainsBit := b.bA[j].Contains(cID)
+			if sliceContainsBit {
+				cVal |= 1 << uint64(j)
+			}
+			if uint64(compValue)&(1<<uint64(j)) > 0 {
+				// BIT in value is SET
+				if !sliceContainsBit {
+					if eq {
+						eq = false
+						if op == MAX && valueIsNegative && !isNegative {
+							gt = true
+							break
+						}
+						if op == MIN && (!valueIsNegative || (valueIsNegative == isNegative)) {
+							lt = true
+						}
+					}
+				}
+			} else {
+				// BIT in value is CLEAR
+				if sliceContainsBit {
+					if eq {
+						eq = false
+						if op == MIN && isNegative && !valueIsNegative {
+							lt = true
+						}
+						if op == MAX && (valueIsNegative || (valueIsNegative == isNegative)) {
+							gt = true
+						}
+					}
+				}
+			}
+		}
+		if lt || gt {
+			value = cVal
+		}
+	}
+
+	resultsChan <- value
+}
+
+// Sum all values contained within the foundSet.   As a convenience, the cardinality of the foundSet
+// is also returned (for calculating the average).
+//
+func (b *BSI) Sum(foundSet *roaring.Bitmap) (sum int64, count uint64) {
+
+	count = foundSet.GetCardinality()
+	var wg sync.WaitGroup
+	for i := 0; i < b.BitCount(); i++ {
+		wg.Add(1)
+		go func(j int) {
+			defer wg.Done()
+			atomic.AddInt64(&sum, int64(foundSet.AndCardinality(b.bA[j])<<uint(j)))
+		}(i)
+	}
+	wg.Wait()
+	return
+}
+
+// Transpose calls b.IntersectAndTranspose(0, b.eBM)
+func (b *BSI) Transpose() *roaring.Bitmap {
+	return b.IntersectAndTranspose(0, b.eBM)
+}
+
+// IntersectAndTranspose is a matrix transpose function.  Return a bitmap such that the values are represented as column IDs
+// in the returned bitmap. This is accomplished by iterating over the foundSet and only including
+// the column IDs in the source (foundSet) as compared with this BSI.  This can be useful for
+// vectoring one set of integers to another.
+func (b *BSI) IntersectAndTranspose(parallelism int, foundSet *roaring.Bitmap) *roaring.Bitmap {
+
+	trans := &task{bsi: b}
+	return parallelExecutor(parallelism, trans, transpose, foundSet)
+}
+
+func transpose(e *task, batch []uint32, resultsChan chan *roaring.Bitmap, wg *sync.WaitGroup) {
+
+	defer wg.Done()
+
+	results := roaring.NewBitmap()
+	if e.bsi.runOptimized {
+		results.RunOptimize()
+	}
+	for _, cID := range batch {
+		if value, ok := e.bsi.GetValue(uint64(cID)); ok {
+			results.Add(uint32(value))
+		}
+	}
+	resultsChan <- results
+}
+
+// ParOr is intended primarily to be a concatenation function to be used during bulk load operations.
+// Care should be taken to make sure that columnIDs do not overlap (unless overlapping values are
+// identical).
+func (b *BSI) ParOr(parallelism int, bsis ...*BSI) {
+
+	// Consolidate sets
+	bits := len(b.bA)
+	for i := 0; i < len(bsis); i++ {
+		if len(bsis[i].bA) > bits {
+			bits = bsis[i].BitCount()
+		}
+	}
+
+	// Make sure we have enough bit slices
+	for bits > b.BitCount() {
+		newBm := roaring.NewBitmap()
+		if b.runOptimized {
+			newBm.RunOptimize()
+		}
+		b.bA = append(b.bA, newBm)
+	}
+
+	a := make([][]*roaring.Bitmap, bits)
+	for i := range a {
+		a[i] = make([]*roaring.Bitmap, 0)
+		for _, x := range bsis {
+			if len(x.bA) > i {
+				a[i] = append(a[i], x.bA[i])
+			} else {
+				a[i] = []*roaring.Bitmap{roaring.NewBitmap()}
+				if b.runOptimized {
+					a[i][0].RunOptimize()
+				}
+			}
+		}
+	}
+
+	// Consolidate existence bit maps
+	ebms := make([]*roaring.Bitmap, len(bsis))
+	for i := range ebms {
+		ebms[i] = bsis[i].eBM
+	}
+
+	// First merge all the bit slices from all bsi maps that exist in target
+	var wg sync.WaitGroup
+	for i := 0; i < bits; i++ {
+		wg.Add(1)
+		go func(j int) {
+			defer wg.Done()
+			x := []*roaring.Bitmap{b.bA[j]}
+			x = append(x, a[j]...)
+			b.bA[j] = roaring.ParOr(parallelism, x...)
+		}(i)
+	}
+	wg.Wait()
+
+	// merge all the EBM maps
+	x := []*roaring.Bitmap{b.eBM}
+	x = append(x, ebms...)
+	b.eBM = roaring.ParOr(parallelism, x...)
+}
+
+// UnmarshalBinary de-serialize a BSI.  The value at bitData[0] is the EBM.  Other indices are in least to most
+// significance order starting at bitData[1] (bit position 0).
+func (b *BSI) UnmarshalBinary(bitData [][]byte) error {
+
+	for i := 1; i < len(bitData); i++ {
+		if bitData == nil || len(bitData[i]) == 0 {
+			continue
+		}
+		if b.BitCount() < i {
+			newBm := roaring.NewBitmap()
+			if b.runOptimized {
+				newBm.RunOptimize()
+			}
+			b.bA = append(b.bA, newBm)
+		}
+		if err := b.bA[i-1].UnmarshalBinary(bitData[i]); err != nil {
+			return err
+		}
+		if b.runOptimized {
+			b.bA[i-1].RunOptimize()
+		}
+
+	}
+	// First element of bitData is the EBM
+	if bitData[0] == nil {
+		b.eBM = roaring.NewBitmap()
+		if b.runOptimized {
+			b.eBM.RunOptimize()
+		}
+		return nil
+	}
+	if err := b.eBM.UnmarshalBinary(bitData[0]); err != nil {
+		return err
+	}
+	if b.runOptimized {
+		b.eBM.RunOptimize()
+	}
+	return nil
+}
+
+// MarshalBinary serializes a BSI
+func (b *BSI) MarshalBinary() ([][]byte, error) {
+
+	var err error
+	data := make([][]byte, b.BitCount()+1)
+	// Add extra element for EBM (BitCount() + 1)
+	for i := 1; i < b.BitCount()+1; i++ {
+		data[i], err = b.bA[i-1].MarshalBinary()
+		if err != nil {
+			return nil, err
+		}
+	}
+	// Marshal EBM
+	data[0], err = b.eBM.MarshalBinary()
+	if err != nil {
+		return nil, err
+	}
+	return data, nil
+}
+
+// BatchEqual returns a bitmap containing the column IDs where the values are contained within the list of values provided.
+func (b *BSI) BatchEqual(parallelism int, values []int64) *roaring.Bitmap {
+
+	valMap := make(map[int64]struct{}, len(values))
+	for i := 0; i < len(values); i++ {
+		valMap[values[i]] = struct{}{}
+	}
+	comp := &task{bsi: b, values: valMap}
+	return parallelExecutor(parallelism, comp, batchEqual, b.eBM)
+}
+
+func batchEqual(e *task, batch []uint32, resultsChan chan *roaring.Bitmap,
+	wg *sync.WaitGroup) {
+
+	defer wg.Done()
+
+	results := roaring.NewBitmap()
+	if e.bsi.runOptimized {
+		results.RunOptimize()
+	}
+
+	for i := 0; i < len(batch); i++ {
+		cID := batch[i]
+		if value, ok := e.bsi.GetValue(uint64(cID)); ok {
+			if _, yes := e.values[int64(value)]; yes {
+				results.Add(cID)
+			}
+		}
+	}
+	resultsChan <- results
+}
+
+// ClearBits cleared the bits that exist in the target if they are also in the found set.
+func ClearBits(foundSet, target *roaring.Bitmap) {
+	iter := foundSet.Iterator()
+	for iter.HasNext() {
+		cID := iter.Next()
+		target.Remove(cID)
+	}
+}
+
+// ClearValues removes the values found in foundSet
+func (b *BSI) ClearValues(foundSet *roaring.Bitmap) {
+
+	var wg sync.WaitGroup
+	wg.Add(1)
+	go func() {
+		defer wg.Done()
+		ClearBits(foundSet, b.eBM)
+	}()
+	for i := 0; i < b.BitCount(); i++ {
+		wg.Add(1)
+		go func(j int) {
+			defer wg.Done()
+			ClearBits(foundSet, b.bA[j])
+		}(i)
+	}
+	wg.Wait()
+}
+
+// NewBSIRetainSet - Construct a new BSI from a clone of existing BSI, retain only values contained in foundSet
+func (b *BSI) NewBSIRetainSet(foundSet *roaring.Bitmap) *BSI {
+
+	newBSI := NewBSI(b.MaxValue, b.MinValue)
+	newBSI.bA = make([]*roaring.Bitmap, b.BitCount())
+	var wg sync.WaitGroup
+	wg.Add(1)
+	go func() {
+		defer wg.Done()
+		newBSI.eBM = b.eBM.Clone()
+		newBSI.eBM.And(foundSet)
+	}()
+	for i := 0; i < b.BitCount(); i++ {
+		wg.Add(1)
+		go func(j int) {
+			defer wg.Done()
+			newBSI.bA[j] = b.bA[j].Clone()
+			newBSI.bA[j].And(foundSet)
+		}(i)
+	}
+	wg.Wait()
+	return newBSI
+}
+
+// Clone performs a deep copy of BSI contents.
+func (b *BSI) Clone() *BSI {
+	return b.NewBSIRetainSet(b.eBM)
+}
+
+// Add - In-place sum the contents of another BSI with this BSI, column wise.
+func (b *BSI) Add(other *BSI) {
+
+	b.eBM.Or(other.eBM)
+	for i := 0; i < len(other.bA); i++ {
+		b.addDigit(other.bA[i], i)
+	}
+}
+
+func (b *BSI) addDigit(foundSet *roaring.Bitmap, i int) {
+
+	if i >= len(b.bA) {
+		b.bA = append(b.bA, roaring.NewBitmap())
+	}
+	carry := roaring.And(b.bA[i], foundSet)
+	b.bA[i].Xor(foundSet)
+	if !carry.IsEmpty() {
+		if i+1 >= len(b.bA) {
+			b.bA = append(b.bA, roaring.NewBitmap())
+		}
+		b.addDigit(carry, i+1)
+	}
+}
+
+// TransposeWithCounts is a matrix transpose function that returns a BSI that has a columnID system defined by the values
+// contained within the input BSI.   Given that for BSIs, different columnIDs can have the same value.  TransposeWithCounts
+// is useful for situations where there is a one-to-many relationship between the vectored integer sets.  The resulting BSI
+// contains the number of times a particular value appeared in the input BSI as an integer count.
+//
+func (b *BSI) TransposeWithCounts(parallelism int, foundSet *roaring.Bitmap) *BSI {
+
+	return parallelExecutorBSIResults(parallelism, b, transposeWithCounts, foundSet, true)
+}
+
+func transposeWithCounts(input *BSI, batch []uint32, resultsChan chan *BSI, wg *sync.WaitGroup) {
+
+	defer wg.Done()
+
+	results := NewDefaultBSI()
+	if input.runOptimized {
+		results.RunOptimize()
+	}
+	for _, cID := range batch {
+		if value, ok := input.GetValue(uint64(cID)); ok {
+			if val, ok2 := results.GetValue(uint64(value)); !ok2 {
+				results.SetValue(uint64(value), 1)
+			} else {
+				val++
+				results.SetValue(uint64(value), val)
+			}
+		}
+	}
+	resultsChan <- results
+}
+
+// Increment - In-place increment of values in a BSI.  Found set select columns for incrementing.
+func (b *BSI) Increment(foundSet *roaring.Bitmap) {
+	b.addDigit(foundSet, 0)
+}
+
+// IncrementAll - In-place increment of all values in a BSI.
+func (b *BSI) IncrementAll() {
+	b.Increment(b.GetExistenceBitmap())
+}
--- a/vendor/github.com/RoaringBitmap/roaring/CONTRIBUTORS
+++ b/vendor/github.com/RoaringBitmap/roaring/CONTRIBUTORS
@@ -0,0 +1,18 @@
+# This is the official list of roaring contributors 
+
+Todd Gruben (@tgruben),
+Daniel Lemire (@lemire),
+Elliot Murphy (@statik),
+Bob Potter (@bpot),
+Tyson Maly (@tvmaly),
+Will Glynn (@willglynn),
+Brent Pedersen (@brentp),
+Jason E. Aten (@glycerine),
+Vali Malinoiu (@0x4139),
+Forud Ghafouri (@fzerorubigd),
+Joe Nall (@joenall),
+(@fredim),
+Edd Robinson (@e-dard),
+Alexander Petrov (@alldroll),
+Guy Molinari (@guymolinari),
+Ling Jin (@JinLingChristopher)
--- a/vendor/github.com/RoaringBitmap/roaring/LICENSE
+++ b/vendor/github.com/RoaringBitmap/roaring/LICENSE
@@ -0,0 +1,235 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright 2016 by the authors
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+================================================================================
+
+Portions of runcontainer.go are from the Go standard library, which is licensed
+under:
+
+Copyright (c) 2009 The Go Authors. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+   * Redistributions of source code must retain the above copyright
+     notice, this list of conditions and the following disclaimer.
+   * Redistributions in binary form must reproduce the above
+     copyright notice, this list of conditions and the following disclaimer
+     in the documentation and/or other materials provided with the
+     distribution.
+   * Neither the name of Google Inc. nor the names of its
+     contributors may be used to endorse or promote products derived from
+     this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/vendor/github.com/RoaringBitmap/roaring/LICENSE-2.0.txt
+++ b/vendor/github.com/RoaringBitmap/roaring/LICENSE-2.0.txt
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright 2016 by the authors
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
--- a/vendor/github.com/RoaringBitmap/roaring/Makefile
+++ b/vendor/github.com/RoaringBitmap/roaring/Makefile
@@ -0,0 +1,107 @@
+.PHONY: help all test format fmtcheck vet lint     qa deps clean nuke ser fetch-real-roaring-datasets
+
+
+
+
+
+
+
+
+# Display general help about this command
+help:
+	@echo ""
+	@echo "The following commands are available:"
+	@echo ""
+	@echo "    make qa          : Run all the tests"
+	@echo "    make test        : Run the unit tests"
+	@echo ""
+	@echo "    make format      : Format the source code"
+	@echo "    make fmtcheck    : Check if the source code has been formatted"
+	@echo "    make vet         : Check for suspicious constructs"
+	@echo "    make lint        : Check for style errors"
+	@echo ""
+	@echo "    make deps        : Get the dependencies"
+	@echo "    make clean       : Remove any build artifact"
+	@echo "    make nuke        : Deletes any intermediate file"
+	@echo ""
+	@echo "    make fuzz-smat   : Fuzzy testing with smat"
+	@echo "    make fuzz-stream : Fuzzy testing with stream deserialization"
+	@echo "    make fuzz-buffer : Fuzzy testing with buffer deserialization"
+	@echo ""
+
+# Alias for help target
+all: help
+test:
+	go test
+	go test -race -run TestConcurrent*
+# Format the source code
+format:
+	@find ./ -type f -name "*.go" -exec gofmt -w {} \;
+
+# Check if the source code has been formatted
+fmtcheck:
+	@mkdir -p target
+	@find ./ -type f -name "*.go" -exec gofmt -d {} \; | tee target/format.diff
+	@test ! -s target/format.diff || { echo "ERROR: the source code has not been formatted - please use 'make format' or 'gofmt'"; exit 1; }
+
+# Check for syntax errors
+vet:
+	GOPATH=$(GOPATH) go vet ./...
+
+# Check for style errors
+lint:
+	GOPATH=$(GOPATH) PATH=$(GOPATH)/bin:$(PATH) golint ./...
+
+
+
+
+
+# Alias to run all quality-assurance checks
+qa: fmtcheck test vet lint
+
+# --- INSTALL ---
+
+# Get the dependencies
+deps:
+	GOPATH=$(GOPATH) go get github.com/stretchr/testify
+	GOPATH=$(GOPATH) go get github.com/bits-and-blooms/bitset
+	GOPATH=$(GOPATH) go get github.com/golang/lint/golint
+	GOPATH=$(GOPATH) go get github.com/mschoch/smat
+	GOPATH=$(GOPATH) go get github.com/dvyukov/go-fuzz/go-fuzz
+	GOPATH=$(GOPATH) go get github.com/dvyukov/go-fuzz/go-fuzz-build
+	GOPATH=$(GOPATH) go get github.com/glycerine/go-unsnap-stream
+	GOPATH=$(GOPATH) go get github.com/philhofer/fwd
+	GOPATH=$(GOPATH) go get github.com/jtolds/gls
+
+fuzz-smat:
+	go test -tags=gofuzz -run=TestGenerateSmatCorpus
+	go-fuzz-build -func FuzzSmat github.com/RoaringBitmap/roaring
+	go-fuzz -bin=./roaring-fuzz.zip -workdir=workdir/ -timeout=200
+
+
+fuzz-stream:
+	go-fuzz-build -func FuzzSerializationStream github.com/RoaringBitmap/roaring
+	go-fuzz -bin=./roaring-fuzz.zip -workdir=workdir/ -timeout=200
+
+
+fuzz-buffer:
+	go-fuzz-build -func FuzzSerializationBuffer github.com/RoaringBitmap/roaring
+	go-fuzz -bin=./roaring-fuzz.zip -workdir=workdir/ -timeout=200
+
+# Remove any build artifact
+clean:
+	GOPATH=$(GOPATH) go clean ./...
+
+# Deletes any intermediate file
+nuke:
+	rm -rf ./target
+	GOPATH=$(GOPATH) go clean -i ./...
+
+cover:
+	go test -coverprofile=coverage.out
+	go tool cover -html=coverage.out
+
+fetch-real-roaring-datasets:
+	# pull github.com/RoaringBitmap/real-roaring-datasets -> testdata/real-roaring-datasets
+	git submodule init
+	git submodule update
--- a/vendor/github.com/RoaringBitmap/roaring/README.md
+++ b/vendor/github.com/RoaringBitmap/roaring/README.md
@@ -0,0 +1,405 @@
+roaring [![Build Status](https://travis-ci.org/RoaringBitmap/roaring.png)](https://travis-ci.org/RoaringBitmap/roaring) [![GoDoc](https://godoc.org/github.com/RoaringBitmap/roaring/roaring64?status.svg)](https://godoc.org/github.com/RoaringBitmap/roaring/roaring64) [![Go Report Card](https://goreportcard.com/badge/RoaringBitmap/roaring)](https://goreportcard.com/report/github.com/RoaringBitmap/roaring)
+[![Build Status](https://cloud.drone.io/api/badges/RoaringBitmap/roaring/status.svg)](https://cloud.drone.io/RoaringBitmap/roaring)
+![Go-CI](https://github.com/RoaringBitmap/roaring/workflows/Go-CI/badge.svg)
+![Go-ARM-CI](https://github.com/RoaringBitmap/roaring/workflows/Go-ARM-CI/badge.svg)
+![Go-Windows-CI](https://github.com/RoaringBitmap/roaring/workflows/Go-Windows-CI/badge.svg)
+=============
+
+This is a go version of the Roaring bitmap data structure. 
+
+
+
+Roaring bitmaps are used by several major systems such as [Apache Lucene][lucene] and derivative systems such as [Solr][solr] and
+[Elasticsearch][elasticsearch], [Apache Druid (Incubating)][druid], [LinkedIn Pinot][pinot], [Netflix Atlas][atlas],  [Apache Spark][spark], [OpenSearchServer][opensearchserver], [Cloud Torrent][cloudtorrent], [Whoosh][whoosh],  [Pilosa][pilosa],  [Microsoft Visual Studio Team Services (VSTS)][vsts], and eBay's [Apache Kylin][kylin]. The YouTube SQL Engine, [Google Procella](https://research.google/pubs/pub48388/), uses Roaring bitmaps for indexing.
+
+[lucene]: https://lucene.apache.org/
+[solr]: https://lucene.apache.org/solr/
+[elasticsearch]: https://www.elastic.co/products/elasticsearch
+[druid]: https://druid.apache.org/
+[spark]: https://spark.apache.org/
+[opensearchserver]: http://www.opensearchserver.com
+[cloudtorrent]: https://github.com/jpillora/cloud-torrent
+[whoosh]: https://bitbucket.org/mchaput/whoosh/wiki/Home
+[pilosa]: https://www.pilosa.com/
+[kylin]: http://kylin.apache.org/
+[pinot]: http://github.com/linkedin/pinot/wiki
+[vsts]: https://www.visualstudio.com/team-services/
+[atlas]: https://github.com/Netflix/atlas
+
+Roaring bitmaps are found to work well in many important applications:
+
+> Use Roaring for bitmap compression whenever possible. Do not use other bitmap compression methods ([Wang et al., SIGMOD 2017](http://db.ucsd.edu/wp-content/uploads/2017/03/sidm338-wangA.pdf))
+
+
+The ``roaring`` Go library is used by
+* [Cloud Torrent](https://github.com/jpillora/cloud-torrent)
+* [runv](https://github.com/hyperhq/runv)
+* [InfluxDB](https://www.influxdata.com)
+* [Pilosa](https://www.pilosa.com/)
+* [Bleve](http://www.blevesearch.com)
+* [lindb](https://github.com/lindb/lindb)
+* [Elasticell](https://github.com/deepfabric/elasticell)
+* [SourceGraph](https://github.com/sourcegraph/sourcegraph)
+* [M3](https://github.com/m3db/m3)
+* [trident](https://github.com/NetApp/trident)
+
+
+This library is used in production in several systems, it is part of the [Awesome Go collection](https://awesome-go.com).
+
+
+There are also  [Java](https://github.com/RoaringBitmap/RoaringBitmap) and [C/C++](https://github.com/RoaringBitmap/CRoaring) versions.  The Java, C, C++ and Go version are binary compatible: e.g,  you can save bitmaps
+from a Java program and load them back in Go, and vice versa. We have a [format specification](https://github.com/RoaringBitmap/RoaringFormatSpec).
+
+
+This code is licensed under Apache License, Version 2.0 (ASL2.0).
+
+Copyright 2016-... by the authors.
+
+When should you use a bitmap?
+===================================
+
+
+Sets are a fundamental abstraction in
+software. They can be implemented in various
+ways, as hash sets, as trees, and so forth.
+In databases and search engines, sets are often an integral
+part of indexes. For example, we may need to maintain a set
+of all documents or rows  (represented by numerical identifier)
+that satisfy some property. Besides adding or removing
+elements from the set, we need fast functions
+to compute the intersection, the union, the difference between sets, and so on.
+
+
+To implement a set
+of integers, a particularly appealing strategy is the
+bitmap (also called bitset or bit vector). Using n bits,
+we can represent any set made of the integers from the range
+[0,n): the ith bit is set to one if integer i is present in the set.
+Commodity processors use words of W=32 or W=64 bits. By combining many such words, we can
+support large values of n. Intersections, unions and differences can then be implemented
+ as bitwise AND, OR and ANDNOT operations.
+More complicated set functions can also be implemented as bitwise operations.
+
+When the bitset approach is applicable, it can be orders of
+magnitude faster than other possible implementation of a set (e.g., as a hash set)
+while using several times less memory.
+
+However, a bitset, even a compressed one is not always applicable. For example, if
+you have 1000 random-looking integers, then a simple array might be the best representation.
+We refer to this case as the "sparse" scenario.
+
+When should you use compressed bitmaps?
+===================================
+
+An uncompressed BitSet can use a lot of memory. For example, if you take a BitSet
+and set the bit at position 1,000,000 to true and you have just over 100kB. That is over 100kB
+to store the position of one bit. This is wasteful  even if you do not care about memory:
+suppose that you need to compute the intersection between this BitSet and another one
+that has a bit at position 1,000,001 to true, then you need to go through all these zeroes,
+whether you like it or not. That can become very wasteful.
+
+This being said, there are definitively cases where attempting to use compressed bitmaps is wasteful.
+For example, if you have a small universe size. E.g., your bitmaps represent sets of integers
+from [0,n) where n is small (e.g., n=64 or n=128). If you are able to uncompressed BitSet and
+it does not blow up your memory usage,  then compressed bitmaps are probably not useful
+to you. In fact, if you do not need compression, then a BitSet offers remarkable speed.
+
+The sparse scenario is another use case where compressed bitmaps should not be used.
+Keep in mind that random-looking data is usually not compressible. E.g., if you have a small set of
+32-bit random integers, it is not mathematically possible to use far less than 32 bits per integer,
+and attempts at compression can be counterproductive.
+
+How does Roaring compares with the alternatives?
+==================================================
+
+
+Most alternatives to Roaring are part of a larger family of compressed bitmaps that are run-length-encoded
+bitmaps. They identify long runs of 1s or 0s and they represent them with a marker word.
+If you have a local mix of 1s and 0, you use an uncompressed word.
+
+There are many formats in this family:
+
+* Oracle's BBC is an obsolete format at this point: though it may provide good compression,
+it is likely much slower than more recent alternatives due to excessive branching.
+* WAH is a patented variation on BBC that provides better performance.
+* Concise is a variation on the patented WAH. It some specific instances, it can compress
+much better than WAH (up to 2x better), but it is generally slower.
+* EWAH is both free of patent, and it is faster than all the above. On the downside, it
+does not compress quite as well. It is faster because it allows some form of "skipping"
+over uncompressed words. So though none of these formats are great at random access, EWAH
+is better than the alternatives.
+
+
+
+There is a big problem with these formats however that can hurt you badly in some cases: there is no random access. If you want to check whether a given value is present in the set, you have to start from the beginning and "uncompress" the whole thing. This means that if you want to intersect a big set with a large set, you still have to uncompress the whole big set in the worst case...
+
+Roaring solves this problem. It works in the following manner. It divides the data into chunks of 2<sup>16</sup> integers
+(e.g., [0, 2<sup>16</sup>), [2<sup>16</sup>, 2 x 2<sup>16</sup>), ...). Within a chunk, it can use an uncompressed bitmap, a simple list of integers,
+or a list of runs. Whatever format it uses, they all allow you to check for the present of any one value quickly
+(e.g., with a binary search). The net result is that Roaring can compute many operations much faster than run-length-encoded
+formats like WAH, EWAH, Concise... Maybe surprisingly, Roaring also generally offers better compression ratios.
+
+
+
+
+
+### References
+
+- Daniel Lemire, Owen Kaser, Nathan Kurz, Luca Deri, Chris O'Hara, François Saint-Jacques, Gregory Ssi-Yan-Kai, Roaring Bitmaps: Implementation of an Optimized Software Library, Software: Practice and Experience 48 (4), 2018 [arXiv:1709.07821](https://arxiv.org/abs/1709.07821)
+-  Samy Chambi, Daniel Lemire, Owen Kaser, Robert Godin,
+Better bitmap performance with Roaring bitmaps,
+Software: Practice and Experience 46 (5), 2016.
+http://arxiv.org/abs/1402.6407 This paper used data from http://lemire.me/data/realroaring2014.html
+- Daniel Lemire, Gregory Ssi-Yan-Kai, Owen Kaser, Consistently faster and smaller compressed bitmaps with Roaring, Software: Practice and Experience 46 (11), 2016. http://arxiv.org/abs/1603.06549
+
+
+### Dependencies
+
+Dependencies are fetched automatically by giving the `-t` flag to `go get`.
+
+they include
+  - github.com/bits-and-blooms/bitset
+  - github.com/mschoch/smat
+  - github.com/glycerine/go-unsnap-stream
+  - github.com/philhofer/fwd
+  - github.com/jtolds/gls
+
+Note that the smat library requires Go 1.6 or better.
+
+#### Installation
+
+  - go get -t github.com/RoaringBitmap/roaring
+
+
+### Example
+
+Here is a simplified but complete example:
+
+```go
+package main
+
+import (
+    "fmt"
+    "github.com/RoaringBitmap/roaring"
+    "bytes"
+)
+
+
+func main() {
+    // example inspired by https://github.com/fzandona/goroar
+    fmt.Println("==roaring==")
+    rb1 := roaring.BitmapOf(1, 2, 3, 4, 5, 100, 1000)
+    fmt.Println(rb1.String())
+
+    rb2 := roaring.BitmapOf(3, 4, 1000)
+    fmt.Println(rb2.String())
+
+    rb3 := roaring.New()
+    fmt.Println(rb3.String())
+
+    fmt.Println("Cardinality: ", rb1.GetCardinality())
+
+    fmt.Println("Contains 3? ", rb1.Contains(3))
+
+    rb1.And(rb2)
+
+    rb3.Add(1)
+    rb3.Add(5)
+
+    rb3.Or(rb1)
+
+    // computes union of the three bitmaps in parallel using 4 workers  
+    roaring.ParOr(4, rb1, rb2, rb3)
+    // computes intersection of the three bitmaps in parallel using 4 workers  
+    roaring.ParAnd(4, rb1, rb2, rb3)
+
+
+    // prints 1, 3, 4, 5, 1000
+    i := rb3.Iterator()
+    for i.HasNext() {
+        fmt.Println(i.Next())
+    }
+    fmt.Println()
+
+    // next we include an example of serialization
+    buf := new(bytes.Buffer)
+    rb1.WriteTo(buf) // we omit error handling
+    newrb:= roaring.New()
+    newrb.ReadFrom(buf)
+    if rb1.Equals(newrb) {
+    	fmt.Println("I wrote the content to a byte stream and read it back.")
+    }
+    // you can iterate over bitmaps using ReverseIterator(), Iterator, ManyIterator()
+}
+```
+
+If you wish to use serialization and handle errors, you might want to
+consider the following sample of code:
+
+```go
+	rb := BitmapOf(1, 2, 3, 4, 5, 100, 1000)
+	buf := new(bytes.Buffer)
+	size,err:=rb.WriteTo(buf)
+	if err != nil {
+		t.Errorf("Failed writing")
+	}
+	newrb:= New()
+	size,err=newrb.ReadFrom(buf)
+	if err != nil {
+		t.Errorf("Failed reading")
+	}
+	if ! rb.Equals(newrb) {
+		t.Errorf("Cannot retrieve serialized version")
+	}
+```
+
+Given N integers in [0,x), then the serialized size in bytes of
+a Roaring bitmap should never exceed this bound:
+
+`` 8 + 9 * ((long)x+65535)/65536 + 2 * N ``
+
+That is, given a fixed overhead for the universe size (x), Roaring
+bitmaps never use more than 2 bytes per integer. You can call
+``BoundSerializedSizeInBytes`` for a more precise estimate.
+
+### 64-bit Roaring
+
+By default, roaring is used to stored unsigned 32-bit integers. However, we also offer
+an extension dedicated to 64-bit integers. It supports roughly the same functions:
+
+```go
+package main
+
+import (
+    "fmt"
+    "github.com/RoaringBitmap/roaring/roaring64"
+    "bytes"
+)
+
+
+func main() {
+    // example inspired by https://github.com/fzandona/goroar
+    fmt.Println("==roaring64==")
+    rb1 := roaring64.BitmapOf(1, 2, 3, 4, 5, 100, 1000)
+    fmt.Println(rb1.String())
+
+    rb2 := roaring64.BitmapOf(3, 4, 1000)
+    fmt.Println(rb2.String())
+
+    rb3 := roaring64.New()
+    fmt.Println(rb3.String())
+
+    fmt.Println("Cardinality: ", rb1.GetCardinality())
+
+    fmt.Println("Contains 3? ", rb1.Contains(3))
+
+    rb1.And(rb2)
+
+    rb3.Add(1)
+    rb3.Add(5)
+
+    rb3.Or(rb1)
+
+
+
+    // prints 1, 3, 4, 5, 1000
+    i := rb3.Iterator()
+    for i.HasNext() {
+        fmt.Println(i.Next())
+    }
+    fmt.Println()
+
+    // next we include an example of serialization
+    buf := new(bytes.Buffer)
+    rb1.WriteTo(buf) // we omit error handling
+    newrb:= roaring64.New()
+    newrb.ReadFrom(buf)
+    if rb1.Equals(newrb) {
+    	fmt.Println("I wrote the content to a byte stream and read it back.")
+    }
+    // you can iterate over bitmaps using ReverseIterator(), Iterator, ManyIterator()
+}
+```
+
+Only the 32-bit roaring format is standard and cross-operable between Java, C++, C and Go. There is no guarantee that the 64-bit versions are compatible.
+
+### Documentation
+
+Current documentation is available at http://godoc.org/github.com/RoaringBitmap/roaring and http://godoc.org/github.com/RoaringBitmap/roaring64
+
+### Goroutine safety
+
+In general, it should not generally be considered safe to access
+the same bitmaps using different goroutines--they are left
+unsynchronized for performance. Should you want to access
+a Bitmap from more than one goroutine, you should
+provide synchronization. Typically this is done by using channels to pass
+the *Bitmap around (in Go style; so there is only ever one owner),
+or by using `sync.Mutex` to serialize operations on Bitmaps.
+
+### Coverage
+
+We test our software. For a report on our test coverage, see
+
+https://coveralls.io/github/RoaringBitmap/roaring?branch=master
+
+### Benchmark
+
+Type
+
+         go test -bench Benchmark -run -
+         
+To run benchmarks on [Real Roaring Datasets](https://github.com/RoaringBitmap/real-roaring-datasets)
+run the following:
+
+```sh
+go get github.com/RoaringBitmap/real-roaring-datasets
+BENCH_REAL_DATA=1 go test -bench BenchmarkRealData -run -
+```
+
+### Iterative use
+
+You can use roaring with gore:
+
+- go get -u github.com/motemen/gore
+- Make sure that ``$GOPATH/bin`` is in your ``$PATH``.
+- go get github.com/RoaringBitmap/roaring
+
+```go
+$ gore
+gore version 0.2.6  :help for help
+gore> :import github.com/RoaringBitmap/roaring
+gore> x:=roaring.New()
+gore> x.Add(1)
+gore> x.String()
+"{1}"
+```
+
+
+### Fuzzy testing
+
+You can help us test further the library with fuzzy testing:
+
+         go get github.com/dvyukov/go-fuzz/go-fuzz
+         go get github.com/dvyukov/go-fuzz/go-fuzz-build
+         go test -tags=gofuzz -run=TestGenerateSmatCorpus
+         go-fuzz-build github.com/RoaringBitmap/roaring
+         go-fuzz -bin=./roaring-fuzz.zip -workdir=workdir/ -timeout=200 -func FuzzSmat
+
+Let it run, and if the # of crashers is > 0, check out the reports in
+the workdir where you should be able to find the panic goroutine stack
+traces.
+
+You may also replace `-func FuzzSmat`  by `-func FuzzSerializationBuffer` or `-func FuzzSerializationStream`.
+
+### Alternative in Go
+
+There is a Go version wrapping the C/C++ implementation https://github.com/RoaringBitmap/gocroaring
+
+For an alternative implementation in Go, see https://github.com/fzandona/goroar
+The two versions were written independently.
+
+
+### Mailing list/discussion group
+
+https://groups.google.com/forum/#!forum/roaring-bitmaps
--- a/vendor/github.com/RoaringBitmap/roaring/arraycontainer.go
+++ b/vendor/github.com/RoaringBitmap/roaring/arraycontainer.go
--- a/vendor/github.com/RoaringBitmap/roaring/bitmapcontainer.go
+++ b/vendor/github.com/RoaringBitmap/roaring/bitmapcontainer.go
--- a/vendor/github.com/RoaringBitmap/roaring/clz.go
+++ b/vendor/github.com/RoaringBitmap/roaring/clz.go
@@ -0,0 +1,11 @@
+// +build go1.9
+// "go1.9", from Go version 1.9 onward
+// See https://golang.org/pkg/go/build/#hdr-Build_Constraints
+
+package roaring
+
+import "math/bits"
+
+func countLeadingZeros(x uint64) int {
+	return bits.LeadingZeros64(x)
+}
--- a/vendor/github.com/RoaringBitmap/roaring/clz_compat.go
+++ b/vendor/github.com/RoaringBitmap/roaring/clz_compat.go
@@ -0,0 +1,36 @@
+// +build !go1.9
+
+package roaring
+
+// LeadingZeroBits returns the number of consecutive most significant zero
+// bits of x.
+func countLeadingZeros(i uint64) int {
+	if i == 0 {
+		return 64
+	}
+	n := 1
+	x := uint32(i >> 32)
+	if x == 0 {
+		n += 32
+		x = uint32(i)
+	}
+	if (x >> 16) == 0 {
+		n += 16
+		x <<= 16
+	}
+	if (x >> 24) == 0 {
+		n += 8
+		x <<= 8
+	}
+	if x>>28 == 0 {
+		n += 4
+		x <<= 4
+	}
+	if x>>30 == 0 {
+		n += 2
+		x <<= 2
+
+	}
+	n -= int(x >> 31)
+	return n
+}
--- a/vendor/github.com/RoaringBitmap/roaring/ctz.go
+++ b/vendor/github.com/RoaringBitmap/roaring/ctz.go
@@ -0,0 +1,11 @@
+// +build go1.9
+// "go1.9", from Go version 1.9 onward
+// See https://golang.org/pkg/go/build/#hdr-Build_Constraints
+
+package roaring
+
+import "math/bits"
+
+func countTrailingZeros(x uint64) int {
+	return bits.TrailingZeros64(x)
+}
--- a/vendor/github.com/RoaringBitmap/roaring/ctz_compat.go
+++ b/vendor/github.com/RoaringBitmap/roaring/ctz_compat.go
@@ -0,0 +1,71 @@
+// +build !go1.9
+
+package roaring
+
+// Reuse of portions of go/src/math/big standard lib code
+// under this license:
+/*
+Copyright (c) 2009 The Go Authors. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+   * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+   * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+   * Neither the name of Google Inc. nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+const deBruijn32 = 0x077CB531
+
+var deBruijn32Lookup = []byte{
+	0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
+	31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9,
+}
+
+const deBruijn64 = 0x03f79d71b4ca8b09
+
+var deBruijn64Lookup = []byte{
+	0, 1, 56, 2, 57, 49, 28, 3, 61, 58, 42, 50, 38, 29, 17, 4,
+	62, 47, 59, 36, 45, 43, 51, 22, 53, 39, 33, 30, 24, 18, 12, 5,
+	63, 55, 48, 27, 60, 41, 37, 16, 46, 35, 44, 21, 52, 32, 23, 11,
+	54, 26, 40, 15, 34, 20, 31, 10, 25, 14, 19, 9, 13, 8, 7, 6,
+}
+
+// trailingZeroBits returns the number of consecutive least significant zero
+// bits of x.
+func countTrailingZeros(x uint64) int {
+	// x & -x leaves only the right-most bit set in the word. Let k be the
+	// index of that bit. Since only a single bit is set, the value is two
+	// to the power of k. Multiplying by a power of two is equivalent to
+	// left shifting, in this case by k bits. The de Bruijn constant is
+	// such that all six bit, consecutive substrings are distinct.
+	// Therefore, if we have a left shifted version of this constant we can
+	// find by how many bits it was shifted by looking at which six bit
+	// substring ended up at the top of the word.
+	// (Knuth, volume 4, section 7.3.1)
+	if x == 0 {
+		// We have to special case 0; the fomula
+		// below doesn't work for 0.
+		return 64
+	}
+	return int(deBruijn64Lookup[((x&-x)*(deBruijn64))>>58])
+}
--- a/vendor/github.com/RoaringBitmap/roaring/fastaggregation.go
+++ b/vendor/github.com/RoaringBitmap/roaring/fastaggregation.go
@@ -0,0 +1,309 @@
+package roaring
+
+import (
+	"container/heap"
+)
+
+// Or function that requires repairAfterLazy
+func lazyOR(x1, x2 *Bitmap) *Bitmap {
+	answer := NewBitmap()
+	pos1 := 0
+	pos2 := 0
+	length1 := x1.highlowcontainer.size()
+	length2 := x2.highlowcontainer.size()
+main:
+	for (pos1 < length1) && (pos2 < length2) {
+		s1 := x1.highlowcontainer.getKeyAtIndex(pos1)
+		s2 := x2.highlowcontainer.getKeyAtIndex(pos2)
+
+		for {
+			if s1 < s2 {
+				answer.highlowcontainer.appendCopy(x1.highlowcontainer, pos1)
+				pos1++
+				if pos1 == length1 {
+					break main
+				}
+				s1 = x1.highlowcontainer.getKeyAtIndex(pos1)
+			} else if s1 > s2 {
+				answer.highlowcontainer.appendCopy(x2.highlowcontainer, pos2)
+				pos2++
+				if pos2 == length2 {
+					break main
+				}
+				s2 = x2.highlowcontainer.getKeyAtIndex(pos2)
+			} else {
+				c1 := x1.highlowcontainer.getContainerAtIndex(pos1)
+				answer.highlowcontainer.appendContainer(s1, c1.lazyOR(x2.highlowcontainer.getContainerAtIndex(pos2)), false)
+				pos1++
+				pos2++
+				if (pos1 == length1) || (pos2 == length2) {
+					break main
+				}
+				s1 = x1.highlowcontainer.getKeyAtIndex(pos1)
+				s2 = x2.highlowcontainer.getKeyAtIndex(pos2)
+			}
+		}
+	}
+	if pos1 == length1 {
+		answer.highlowcontainer.appendCopyMany(x2.highlowcontainer, pos2, length2)
+	} else if pos2 == length2 {
+		answer.highlowcontainer.appendCopyMany(x1.highlowcontainer, pos1, length1)
+	}
+	return answer
+}
+
+// In-place Or function that requires repairAfterLazy
+func (x1 *Bitmap) lazyOR(x2 *Bitmap) *Bitmap {
+	pos1 := 0
+	pos2 := 0
+	length1 := x1.highlowcontainer.size()
+	length2 := x2.highlowcontainer.size()
+main:
+	for (pos1 < length1) && (pos2 < length2) {
+		s1 := x1.highlowcontainer.getKeyAtIndex(pos1)
+		s2 := x2.highlowcontainer.getKeyAtIndex(pos2)
+
+		for {
+			if s1 < s2 {
+				pos1++
+				if pos1 == length1 {
+					break main
+				}
+				s1 = x1.highlowcontainer.getKeyAtIndex(pos1)
+			} else if s1 > s2 {
+				x1.highlowcontainer.insertNewKeyValueAt(pos1, s2, x2.highlowcontainer.getContainerAtIndex(pos2).clone())
+				pos2++
+				pos1++
+				length1++
+				if pos2 == length2 {
+					break main
+				}
+				s2 = x2.highlowcontainer.getKeyAtIndex(pos2)
+			} else {
+				c1 := x1.highlowcontainer.getWritableContainerAtIndex(pos1)
+				x1.highlowcontainer.containers[pos1] = c1.lazyIOR(x2.highlowcontainer.getContainerAtIndex(pos2))
+				x1.highlowcontainer.needCopyOnWrite[pos1] = false
+				pos1++
+				pos2++
+				if (pos1 == length1) || (pos2 == length2) {
+					break main
+				}
+				s1 = x1.highlowcontainer.getKeyAtIndex(pos1)
+				s2 = x2.highlowcontainer.getKeyAtIndex(pos2)
+			}
+		}
+	}
+	if pos1 == length1 {
+		x1.highlowcontainer.appendCopyMany(x2.highlowcontainer, pos2, length2)
+	}
+	return x1
+}
+
+// to be called after lazy aggregates
+func (x1 *Bitmap) repairAfterLazy() {
+	for pos := 0; pos < x1.highlowcontainer.size(); pos++ {
+		c := x1.highlowcontainer.getContainerAtIndex(pos)
+		switch c.(type) {
+		case *bitmapContainer:
+			if c.(*bitmapContainer).cardinality == invalidCardinality {
+				c = x1.highlowcontainer.getWritableContainerAtIndex(pos)
+				c.(*bitmapContainer).computeCardinality()
+				if c.(*bitmapContainer).getCardinality() <= arrayDefaultMaxSize {
+					x1.highlowcontainer.setContainerAtIndex(pos, c.(*bitmapContainer).toArrayContainer())
+				} else if c.(*bitmapContainer).isFull() {
+					x1.highlowcontainer.setContainerAtIndex(pos, newRunContainer16Range(0, MaxUint16))
+				}
+			}
+		}
+	}
+}
+
+// FastAnd computes the intersection between many bitmaps quickly
+// Compared to the And function, it can take many bitmaps as input, thus saving the trouble
+// of manually calling "And" many times.
+func FastAnd(bitmaps ...*Bitmap) *Bitmap {
+	if len(bitmaps) == 0 {
+		return NewBitmap()
+	} else if len(bitmaps) == 1 {
+		return bitmaps[0].Clone()
+	}
+	answer := And(bitmaps[0], bitmaps[1])
+	for _, bm := range bitmaps[2:] {
+		answer.And(bm)
+	}
+	return answer
+}
+
+// FastOr computes the union between many bitmaps quickly, as opposed to having to call Or repeatedly.
+// It might also be faster than calling Or repeatedly.
+func FastOr(bitmaps ...*Bitmap) *Bitmap {
+	if len(bitmaps) == 0 {
+		return NewBitmap()
+	} else if len(bitmaps) == 1 {
+		return bitmaps[0].Clone()
+	}
+	answer := lazyOR(bitmaps[0], bitmaps[1])
+	for _, bm := range bitmaps[2:] {
+		answer = answer.lazyOR(bm)
+	}
+	// here is where repairAfterLazy is called.
+	answer.repairAfterLazy()
+	return answer
+}
+
+// HeapOr computes the union between many bitmaps quickly using a heap.
+// It might be faster than calling Or repeatedly.
+func HeapOr(bitmaps ...*Bitmap) *Bitmap {
+	if len(bitmaps) == 0 {
+		return NewBitmap()
+	}
+	// TODO:  for better speed, we could do the operation lazily, see Java implementation
+	pq := make(priorityQueue, len(bitmaps))
+	for i, bm := range bitmaps {
+		pq[i] = &item{bm, i}
+	}
+	heap.Init(&pq)
+
+	for pq.Len() > 1 {
+		x1 := heap.Pop(&pq).(*item)
+		x2 := heap.Pop(&pq).(*item)
+		heap.Push(&pq, &item{Or(x1.value, x2.value), 0})
+	}
+	return heap.Pop(&pq).(*item).value
+}
+
+// HeapXor computes the symmetric difference between many bitmaps quickly (as opposed to calling Xor repeated).
+// Internally, this function uses a heap.
+// It might be faster than calling Xor repeatedly.
+func HeapXor(bitmaps ...*Bitmap) *Bitmap {
+	if len(bitmaps) == 0 {
+		return NewBitmap()
+	}
+
+	pq := make(priorityQueue, len(bitmaps))
+	for i, bm := range bitmaps {
+		pq[i] = &item{bm, i}
+	}
+	heap.Init(&pq)
+
+	for pq.Len() > 1 {
+		x1 := heap.Pop(&pq).(*item)
+		x2 := heap.Pop(&pq).(*item)
+		heap.Push(&pq, &item{Xor(x1.value, x2.value), 0})
+	}
+	return heap.Pop(&pq).(*item).value
+}
+
+// AndAny provides a result equivalent to x1.And(FastOr(bitmaps)).
+// It's optimized to minimize allocations. It also might be faster than separate calls.
+func (x1 *Bitmap) AndAny(bitmaps ...*Bitmap) {
+	if len(bitmaps) == 0 {
+		return
+	} else if len(bitmaps) == 1 {
+		x1.And(bitmaps[0])
+		return
+	}
+
+	type withPos struct {
+		bitmap *roaringArray
+		pos    int
+		key    uint16
+	}
+	filters := make([]withPos, 0, len(bitmaps))
+
+	for _, b := range bitmaps {
+		if b.highlowcontainer.size() > 0 {
+			filters = append(filters, withPos{
+				bitmap: &b.highlowcontainer,
+				pos:    0,
+				key:    b.highlowcontainer.getKeyAtIndex(0),
+			})
+		}
+	}
+
+	basePos := 0
+	intersections := 0
+	keyContainers := make([]container, 0, len(filters))
+	var (
+		tmpArray   *arrayContainer
+		tmpBitmap  *bitmapContainer
+		minNextKey uint16
+	)
+
+	for basePos < x1.highlowcontainer.size() && len(filters) > 0 {
+		baseKey := x1.highlowcontainer.getKeyAtIndex(basePos)
+
+		// accumulate containers for current key, find next minimal key in filters
+		// and exclude filters that do not have related values anymore
+		i := 0
+		maxPossibleOr := 0
+		minNextKey = MaxUint16
+		for _, f := range filters {
+			if f.key < baseKey {
+				f.pos = f.bitmap.advanceUntil(baseKey, f.pos)
+				if f.pos == f.bitmap.size() {
+					continue
+				}
+				f.key = f.bitmap.getKeyAtIndex(f.pos)
+			}
+
+			if f.key == baseKey {
+				cont := f.bitmap.getContainerAtIndex(f.pos)
+				keyContainers = append(keyContainers, cont)
+				maxPossibleOr += cont.getCardinality()
+
+				f.pos++
+				if f.pos == f.bitmap.size() {
+					continue
+				}
+				f.key = f.bitmap.getKeyAtIndex(f.pos)
+			}
+
+			minNextKey = minOfUint16(minNextKey, f.key)
+			filters[i] = f
+			i++
+		}
+		filters = filters[:i]
+
+		if len(keyContainers) == 0 {
+			basePos = x1.highlowcontainer.advanceUntil(minNextKey, basePos)
+			continue
+		}
+
+		var ored container
+
+		if len(keyContainers) == 1 {
+			ored = keyContainers[0]
+		} else {
+			//TODO: special case for run containers?
+			if maxPossibleOr > arrayDefaultMaxSize {
+				if tmpBitmap == nil {
+					tmpBitmap = newBitmapContainer()
+				}
+				tmpBitmap.resetTo(keyContainers[0])
+				ored = tmpBitmap
+			} else {
+				if tmpArray == nil {
+					tmpArray = newArrayContainerCapacity(maxPossibleOr)
+				}
+				tmpArray.realloc(maxPossibleOr)
+				tmpArray.resetTo(keyContainers[0])
+				ored = tmpArray
+			}
+			for _, c := range keyContainers[1:] {
+				ored = ored.ior(c)
+			}
+		}
+
+		result := x1.highlowcontainer.getWritableContainerAtIndex(basePos).iand(ored)
+		if !result.isEmpty() {
+			x1.highlowcontainer.replaceKeyAndContainerAtIndex(intersections, baseKey, result, false)
+			intersections++
+		}
+
+		keyContainers = keyContainers[:0]
+		basePos = x1.highlowcontainer.advanceUntil(minNextKey, basePos)
+	}
+
+	x1.highlowcontainer.resize(intersections)
+}
--- a/vendor/github.com/RoaringBitmap/roaring/internal/byte_input.go
+++ b/vendor/github.com/RoaringBitmap/roaring/internal/byte_input.go
@@ -0,0 +1,166 @@
+package internal
+
+import (
+	"encoding/binary"
+	"io"
+)
+
+// ByteInput typed interface around io.Reader or raw bytes
+type ByteInput interface {
+	// Next returns a slice containing the next n bytes from the buffer,
+	// advancing the buffer as if the bytes had been returned by Read.
+	Next(n int) ([]byte, error)
+	// ReadUInt32 reads uint32 with LittleEndian order
+	ReadUInt32() (uint32, error)
+	// ReadUInt16 reads uint16 with LittleEndian order
+	ReadUInt16() (uint16, error)
+	// GetReadBytes returns read bytes
+	GetReadBytes() int64
+	// SkipBytes skips exactly n bytes
+	SkipBytes(n int) error
+}
+
+// NewByteInputFromReader creates reader wrapper
+func NewByteInputFromReader(reader io.Reader) ByteInput {
+	return &ByteInputAdapter{
+		r:         reader,
+		readBytes: 0,
+	}
+}
+
+// NewByteInput creates raw bytes wrapper
+func NewByteInput(buf []byte) ByteInput {
+	return &ByteBuffer{
+		buf: buf,
+		off: 0,
+	}
+}
+
+// ByteBuffer raw bytes wrapper
+type ByteBuffer struct {
+	buf []byte
+	off int
+}
+
+// Next returns a slice containing the next n bytes from the reader
+// If there are fewer bytes than the given n, io.ErrUnexpectedEOF will be returned
+func (b *ByteBuffer) Next(n int) ([]byte, error) {
+	m := len(b.buf) - b.off
+
+	if n > m {
+		return nil, io.ErrUnexpectedEOF
+	}
+
+	data := b.buf[b.off : b.off+n]
+	b.off += n
+
+	return data, nil
+}
+
+// ReadUInt32 reads uint32 with LittleEndian order
+func (b *ByteBuffer) ReadUInt32() (uint32, error) {
+	if len(b.buf)-b.off < 4 {
+		return 0, io.ErrUnexpectedEOF
+	}
+
+	v := binary.LittleEndian.Uint32(b.buf[b.off:])
+	b.off += 4
+
+	return v, nil
+}
+
+// ReadUInt16 reads uint16 with LittleEndian order
+func (b *ByteBuffer) ReadUInt16() (uint16, error) {
+	if len(b.buf)-b.off < 2 {
+		return 0, io.ErrUnexpectedEOF
+	}
+
+	v := binary.LittleEndian.Uint16(b.buf[b.off:])
+	b.off += 2
+
+	return v, nil
+}
+
+// GetReadBytes returns read bytes
+func (b *ByteBuffer) GetReadBytes() int64 {
+	return int64(b.off)
+}
+
+// SkipBytes skips exactly n bytes
+func (b *ByteBuffer) SkipBytes(n int) error {
+	m := len(b.buf) - b.off
+
+	if n > m {
+		return io.ErrUnexpectedEOF
+	}
+
+	b.off += n
+
+	return nil
+}
+
+// Reset resets the given buffer with a new byte slice
+func (b *ByteBuffer) Reset(buf []byte) {
+	b.buf = buf
+	b.off = 0
+}
+
+// ByteInputAdapter reader wrapper
+type ByteInputAdapter struct {
+	r         io.Reader
+	readBytes int
+}
+
+// Next returns a slice containing the next n bytes from the buffer,
+// advancing the buffer as if the bytes had been returned by Read.
+func (b *ByteInputAdapter) Next(n int) ([]byte, error) {
+	buf := make([]byte, n)
+	m, err := io.ReadAtLeast(b.r, buf, n)
+	b.readBytes += m
+
+	if err != nil {
+		return nil, err
+	}
+
+	return buf, nil
+}
+
+// ReadUInt32 reads uint32 with LittleEndian order
+func (b *ByteInputAdapter) ReadUInt32() (uint32, error) {
+	buf, err := b.Next(4)
+
+	if err != nil {
+		return 0, err
+	}
+
+	return binary.LittleEndian.Uint32(buf), nil
+}
+
+// ReadUInt16 reads uint16 with LittleEndian order
+func (b *ByteInputAdapter) ReadUInt16() (uint16, error) {
+	buf, err := b.Next(2)
+
+	if err != nil {
+		return 0, err
+	}
+
+	return binary.LittleEndian.Uint16(buf), nil
+}
+
+// GetReadBytes returns read bytes
+func (b *ByteInputAdapter) GetReadBytes() int64 {
+	return int64(b.readBytes)
+}
+
+// SkipBytes skips exactly n bytes
+func (b *ByteInputAdapter) SkipBytes(n int) error {
+	_, err := b.Next(n)
+
+	return err
+}
+
+// Reset resets the given buffer with a new stream
+func (b *ByteInputAdapter) Reset(stream io.Reader) {
+	b.r = stream
+	b.readBytes = 0
+}
--- a/vendor/github.com/RoaringBitmap/roaring/internal/pools.go
+++ b/vendor/github.com/RoaringBitmap/roaring/internal/pools.go
@@ -0,0 +1,21 @@
+package internal
+
+import (
+	"sync"
+)
+
+var (
+	// ByteInputAdapterPool shared pool
+	ByteInputAdapterPool = sync.Pool{
+		New: func() interface{} {
+			return &ByteInputAdapter{}
+		},
+	}
+
+	// ByteBufferPool shared pool
+	ByteBufferPool = sync.Pool{
+		New: func() interface{} {
+			return &ByteBuffer{}
+		},
+	}
+)
--- a/vendor/github.com/RoaringBitmap/roaring/manyiterator.go
+++ b/vendor/github.com/RoaringBitmap/roaring/manyiterator.go
@@ -0,0 +1,32 @@
+package roaring
+
+type manyIterable interface {
+	nextMany(hs uint32, buf []uint32) int
+	nextMany64(hs uint64, buf []uint64) int
+}
+
+func (si *shortIterator) nextMany(hs uint32, buf []uint32) int {
+	n := 0
+	l := si.loc
+	s := si.slice
+	for n < len(buf) && l < len(s) {
+		buf[n] = uint32(s[l]) | hs
+		l++
+		n++
+	}
+	si.loc = l
+	return n
+}
+
+func (si *shortIterator) nextMany64(hs uint64, buf []uint64) int {
+	n := 0
+	l := si.loc
+	s := si.slice
+	for n < len(buf) && l < len(s) {
+		buf[n] = uint64(s[l]) | hs
+		l++
+		n++
+	}
+	si.loc = l
+	return n
+}
--- a/vendor/github.com/RoaringBitmap/roaring/parallel.go
+++ b/vendor/github.com/RoaringBitmap/roaring/parallel.go
@@ -0,0 +1,612 @@
+package roaring
+
+import (
+	"container/heap"
+	"fmt"
+	"runtime"
+	"sync"
+)
+
+var defaultWorkerCount = runtime.NumCPU()
+
+type bitmapContainerKey struct {
+	key    uint16
+	idx    int
+	bitmap *Bitmap
+}
+
+type multipleContainers struct {
+	key        uint16
+	containers []container
+	idx        int
+}
+
+type keyedContainer struct {
+	key       uint16
+	container container
+	idx       int
+}
+
+type bitmapContainerHeap []bitmapContainerKey
+
+func (h bitmapContainerHeap) Len() int           { return len(h) }
+func (h bitmapContainerHeap) Less(i, j int) bool { return h[i].key < h[j].key }
+func (h bitmapContainerHeap) Swap(i, j int)      { h[i], h[j] = h[j], h[i] }
+
+func (h *bitmapContainerHeap) Push(x interface{}) {
+	// Push and Pop use pointer receivers because they modify the slice's length,
+	// not just its contents.
+	*h = append(*h, x.(bitmapContainerKey))
+}
+
+func (h *bitmapContainerHeap) Pop() interface{} {
+	old := *h
+	n := len(old)
+	x := old[n-1]
+	*h = old[0 : n-1]
+	return x
+}
+
+func (h bitmapContainerHeap) Peek() bitmapContainerKey {
+	return h[0]
+}
+
+func (h *bitmapContainerHeap) popIncrementing() (key uint16, container container) {
+	k := h.Peek()
+	key = k.key
+	container = k.bitmap.highlowcontainer.containers[k.idx]
+
+	newIdx := k.idx + 1
+	if newIdx < k.bitmap.highlowcontainer.size() {
+		k = bitmapContainerKey{
+			k.bitmap.highlowcontainer.keys[newIdx],
+			newIdx,
+			k.bitmap,
+		}
+		(*h)[0] = k
+		heap.Fix(h, 0)
+	} else {
+		heap.Pop(h)
+	}
+
+	return
+}
+
+func (h *bitmapContainerHeap) Next(containers []container) multipleContainers {
+	if h.Len() == 0 {
+		return multipleContainers{}
+	}
+
+	key, container := h.popIncrementing()
+	containers = append(containers, container)
+
+	for h.Len() > 0 && key == h.Peek().key {
+		_, container = h.popIncrementing()
+		containers = append(containers, container)
+	}
+
+	return multipleContainers{
+		key,
+		containers,
+		-1,
+	}
+}
+
+func newBitmapContainerHeap(bitmaps ...*Bitmap) bitmapContainerHeap {
+	// Initialize heap
+	var h bitmapContainerHeap = make([]bitmapContainerKey, 0, len(bitmaps))
+	for _, bitmap := range bitmaps {
+		if !bitmap.IsEmpty() {
+			key := bitmapContainerKey{
+				bitmap.highlowcontainer.keys[0],
+				0,
+				bitmap,
+			}
+			h = append(h, key)
+		}
+	}
+
+	heap.Init(&h)
+
+	return h
+}
+
+func repairAfterLazy(c container) container {
+	switch t := c.(type) {
+	case *bitmapContainer:
+		if t.cardinality == invalidCardinality {
+			t.computeCardinality()
+		}
+
+		if t.getCardinality() <= arrayDefaultMaxSize {
+			return t.toArrayContainer()
+		} else if c.(*bitmapContainer).isFull() {
+			return newRunContainer16Range(0, MaxUint16)
+		}
+	}
+
+	return c
+}
+
+func toBitmapContainer(c container) container {
+	switch t := c.(type) {
+	case *arrayContainer:
+		return t.toBitmapContainer()
+	case *runContainer16:
+		if !t.isFull() {
+			return t.toBitmapContainer()
+		}
+	}
+	return c
+}
+
+func appenderRoutine(bitmapChan chan<- *Bitmap, resultChan <-chan keyedContainer, expectedKeysChan <-chan int) {
+	expectedKeys := -1
+	appendedKeys := 0
+	var keys []uint16
+	var containers []container
+	for appendedKeys != expectedKeys {
+		select {
+		case item := <-resultChan:
+			if len(keys) <= item.idx {
+				keys = append(keys, make([]uint16, item.idx-len(keys)+1)...)
+				containers = append(containers, make([]container, item.idx-len(containers)+1)...)
+			}
+			keys[item.idx] = item.key
+			containers[item.idx] = item.container
+
+			appendedKeys++
+		case msg := <-expectedKeysChan:
+			expectedKeys = msg
+		}
+	}
+	answer := &Bitmap{
+		roaringArray{
+			make([]uint16, 0, expectedKeys),
+			make([]container, 0, expectedKeys),
+			make([]bool, 0, expectedKeys),
+			false,
+		},
+	}
+	for i := range keys {
+		if containers[i] != nil { // in case a resulting container was empty, see ParAnd function
+			answer.highlowcontainer.appendContainer(keys[i], containers[i], false)
+		}
+	}
+
+	bitmapChan <- answer
+}
+
+// ParHeapOr computes the union (OR) of all provided bitmaps in parallel,
+// where the parameter "parallelism" determines how many workers are to be used
+// (if it is set to 0, a default number of workers is chosen)
+// ParHeapOr uses a heap to compute the union. For rare cases it might be faster than ParOr
+func ParHeapOr(parallelism int, bitmaps ...*Bitmap) *Bitmap {
+
+	bitmapCount := len(bitmaps)
+	if bitmapCount == 0 {
+		return NewBitmap()
+	} else if bitmapCount == 1 {
+		return bitmaps[0].Clone()
+	}
+
+	if parallelism == 0 {
+		parallelism = defaultWorkerCount
+	}
+
+	h := newBitmapContainerHeap(bitmaps...)
+
+	bitmapChan := make(chan *Bitmap)
+	inputChan := make(chan multipleContainers, 128)
+	resultChan := make(chan keyedContainer, 32)
+	expectedKeysChan := make(chan int)
+
+	pool := sync.Pool{
+		New: func() interface{} {
+			return make([]container, 0, len(bitmaps))
+		},
+	}
+
+	orFunc := func() {
+		// Assumes only structs with >=2 containers are passed
+		for input := range inputChan {
+			c := toBitmapContainer(input.containers[0]).lazyOR(input.containers[1])
+			for _, next := range input.containers[2:] {
+				c = c.lazyIOR(next)
+			}
+			c = repairAfterLazy(c)
+			kx := keyedContainer{
+				input.key,
+				c,
+				input.idx,
+			}
+			resultChan <- kx
+			pool.Put(input.containers[:0])
+		}
+	}
+
+	go appenderRoutine(bitmapChan, resultChan, expectedKeysChan)
+
+	for i := 0; i < parallelism; i++ {
+		go orFunc()
+	}
+
+	idx := 0
+	for h.Len() > 0 {
+		ck := h.Next(pool.Get().([]container))
+		if len(ck.containers) == 1 {
+			resultChan <- keyedContainer{
+				ck.key,
+				ck.containers[0],
+				idx,
+			}
+			pool.Put(ck.containers[:0])
+		} else {
+			ck.idx = idx
+			inputChan <- ck
+		}
+		idx++
+	}
+	expectedKeysChan <- idx
+
+	bitmap := <-bitmapChan
+
+	close(inputChan)
+	close(resultChan)
+	close(expectedKeysChan)
+
+	return bitmap
+}
+
+// ParAnd computes the intersection (AND) of all provided bitmaps in parallel,
+// where the parameter "parallelism" determines how many workers are to be used
+// (if it is set to 0, a default number of workers is chosen)
+func ParAnd(parallelism int, bitmaps ...*Bitmap) *Bitmap {
+	bitmapCount := len(bitmaps)
+	if bitmapCount == 0 {
+		return NewBitmap()
+	} else if bitmapCount == 1 {
+		return bitmaps[0].Clone()
+	}
+
+	if parallelism == 0 {
+		parallelism = defaultWorkerCount
+	}
+
+	h := newBitmapContainerHeap(bitmaps...)
+
+	bitmapChan := make(chan *Bitmap)
+	inputChan := make(chan multipleContainers, 128)
+	resultChan := make(chan keyedContainer, 32)
+	expectedKeysChan := make(chan int)
+
+	andFunc := func() {
+		// Assumes only structs with >=2 containers are passed
+		for input := range inputChan {
+			c := input.containers[0].and(input.containers[1])
+			for _, next := range input.containers[2:] {
+				if c.isEmpty() {
+					break
+				}
+				c = c.iand(next)
+			}
+
+			// Send a nil explicitly if the result of the intersection is an empty container
+			if c.isEmpty() {
+				c = nil
+			}
+
+			kx := keyedContainer{
+				input.key,
+				c,
+				input.idx,
+			}
+			resultChan <- kx
+		}
+	}
+
+	go appenderRoutine(bitmapChan, resultChan, expectedKeysChan)
+
+	for i := 0; i < parallelism; i++ {
+		go andFunc()
+	}
+
+	idx := 0
+	for h.Len() > 0 {
+		ck := h.Next(make([]container, 0, 4))
+		if len(ck.containers) == bitmapCount {
+			ck.idx = idx
+			inputChan <- ck
+			idx++
+		}
+	}
+	expectedKeysChan <- idx
+
+	bitmap := <-bitmapChan
+
+	close(inputChan)
+	close(resultChan)
+	close(expectedKeysChan)
+
+	return bitmap
+}
+
+// ParOr computes the union (OR) of all provided bitmaps in parallel,
+// where the parameter "parallelism" determines how many workers are to be used
+// (if it is set to 0, a default number of workers is chosen)
+func ParOr(parallelism int, bitmaps ...*Bitmap) *Bitmap {
+	var lKey uint16 = MaxUint16
+	var hKey uint16
+
+	bitmapsFiltered := bitmaps[:0]
+	for _, b := range bitmaps {
+		if !b.IsEmpty() {
+			bitmapsFiltered = append(bitmapsFiltered, b)
+		}
+	}
+	bitmaps = bitmapsFiltered
+
+	for _, b := range bitmaps {
+		lKey = minOfUint16(lKey, b.highlowcontainer.keys[0])
+		hKey = maxOfUint16(hKey, b.highlowcontainer.keys[b.highlowcontainer.size()-1])
+	}
+
+	if lKey == MaxUint16 && hKey == 0 {
+		return New()
+	} else if len(bitmaps) == 1 {
+		return bitmaps[0].Clone()
+	}
+
+	keyRange := int(hKey) - int(lKey) + 1
+	if keyRange == 1 {
+		// revert to FastOr. Since the key range is 0
+		// no container-level aggregation parallelism is achievable
+		return FastOr(bitmaps...)
+	}
+
+	if parallelism == 0 {
+		parallelism = defaultWorkerCount
+	}
+
+	var chunkSize int
+	var chunkCount int
+	if parallelism*4 > int(keyRange) {
+		chunkSize = 1
+		chunkCount = int(keyRange)
+	} else {
+		chunkCount = parallelism * 4
+		chunkSize = (int(keyRange) + chunkCount - 1) / chunkCount
+	}
+
+	if chunkCount*chunkSize < int(keyRange) {
+		// it's fine to panic to indicate an implementation error
+		panic(fmt.Sprintf("invariant check failed: chunkCount * chunkSize < keyRange, %d * %d < %d", chunkCount, chunkSize, keyRange))
+	}
+
+	chunks := make([]*roaringArray, chunkCount)
+
+	chunkSpecChan := make(chan parChunkSpec, minOfInt(maxOfInt(64, 2*parallelism), int(chunkCount)))
+	chunkChan := make(chan parChunk, minOfInt(32, int(chunkCount)))
+
+	orFunc := func() {
+		for spec := range chunkSpecChan {
+			ra := lazyOrOnRange(&bitmaps[0].highlowcontainer, &bitmaps[1].highlowcontainer, spec.start, spec.end)
+			for _, b := range bitmaps[2:] {
+				ra = lazyIOrOnRange(ra, &b.highlowcontainer, spec.start, spec.end)
+			}
+
+			for i, c := range ra.containers {
+				ra.containers[i] = repairAfterLazy(c)
+			}
+
+			chunkChan <- parChunk{ra, spec.idx}
+		}
+	}
+
+	for i := 0; i < parallelism; i++ {
+		go orFunc()
+	}
+
+	go func() {
+		for i := 0; i < chunkCount; i++ {
+			spec := parChunkSpec{
+				start: uint16(int(lKey) + i*chunkSize),
+				end:   uint16(minOfInt(int(lKey)+(i+1)*chunkSize-1, int(hKey))),
+				idx:   int(i),
+			}
+			chunkSpecChan <- spec
+		}
+	}()
+
+	chunksRemaining := chunkCount
+	for chunk := range chunkChan {
+		chunks[chunk.idx] = chunk.ra
+		chunksRemaining--
+		if chunksRemaining == 0 {
+			break
+		}
+	}
+	close(chunkChan)
+	close(chunkSpecChan)
+
+	containerCount := 0
+	for _, chunk := range chunks {
+		containerCount += chunk.size()
+	}
+
+	result := Bitmap{
+		roaringArray{
+			containers:      make([]container, containerCount),
+			keys:            make([]uint16, containerCount),
+			needCopyOnWrite: make([]bool, containerCount),
+		},
+	}
+
+	resultOffset := 0
+	for _, chunk := range chunks {
+		copy(result.highlowcontainer.containers[resultOffset:], chunk.containers)
+		copy(result.highlowcontainer.keys[resultOffset:], chunk.keys)
+		copy(result.highlowcontainer.needCopyOnWrite[resultOffset:], chunk.needCopyOnWrite)
+		resultOffset += chunk.size()
+	}
+
+	return &result
+}
+
+type parChunkSpec struct {
+	start uint16
+	end   uint16
+	idx   int
+}
+
+type parChunk struct {
+	ra  *roaringArray
+	idx int
+}
+
+func (c parChunk) size() int {
+	return c.ra.size()
+}
+
+func parNaiveStartAt(ra *roaringArray, start uint16, last uint16) int {
+	for idx, key := range ra.keys {
+		if key >= start && key <= last {
+			return idx
+		} else if key > last {
+			break
+		}
+	}
+	return ra.size()
+}
+
+func lazyOrOnRange(ra1, ra2 *roaringArray, start, last uint16) *roaringArray {
+	answer := newRoaringArray()
+	length1 := ra1.size()
+	length2 := ra2.size()
+
+	idx1 := parNaiveStartAt(ra1, start, last)
+	idx2 := parNaiveStartAt(ra2, start, last)
+
+	var key1 uint16
+	var key2 uint16
+	if idx1 < length1 && idx2 < length2 {
+		key1 = ra1.getKeyAtIndex(idx1)
+		key2 = ra2.getKeyAtIndex(idx2)
+
+		for key1 <= last && key2 <= last {
+
+			if key1 < key2 {
+				answer.appendCopy(*ra1, idx1)
+				idx1++
+				if idx1 == length1 {
+					break
+				}
+				key1 = ra1.getKeyAtIndex(idx1)
+			} else if key1 > key2 {
+				answer.appendCopy(*ra2, idx2)
+				idx2++
+				if idx2 == length2 {
+					break
+				}
+				key2 = ra2.getKeyAtIndex(idx2)
+			} else {
+				c1 := ra1.getFastContainerAtIndex(idx1, false)
+
+				answer.appendContainer(key1, c1.lazyOR(ra2.getContainerAtIndex(idx2)), false)
+				idx1++
+				idx2++
+				if idx1 == length1 || idx2 == length2 {
+					break
+				}
+
+				key1 = ra1.getKeyAtIndex(idx1)
+				key2 = ra2.getKeyAtIndex(idx2)
+			}
+		}
+	}
+
+	if idx2 < length2 {
+		key2 = ra2.getKeyAtIndex(idx2)
+		for key2 <= last {
+			answer.appendCopy(*ra2, idx2)
+			idx2++
+			if idx2 == length2 {
+				break
+			}
+			key2 = ra2.getKeyAtIndex(idx2)
+		}
+	}
+
+	if idx1 < length1 {
+		key1 = ra1.getKeyAtIndex(idx1)
+		for key1 <= last {
+			answer.appendCopy(*ra1, idx1)
+			idx1++
+			if idx1 == length1 {
+				break
+			}
+			key1 = ra1.getKeyAtIndex(idx1)
+		}
+	}
+	return answer
+}
+
+func lazyIOrOnRange(ra1, ra2 *roaringArray, start, last uint16) *roaringArray {
+	length1 := ra1.size()
+	length2 := ra2.size()
+
+	idx1 := 0
+	idx2 := parNaiveStartAt(ra2, start, last)
+
+	var key1 uint16
+	var key2 uint16
+	if idx1 < length1 && idx2 < length2 {
+		key1 = ra1.getKeyAtIndex(idx1)
+		key2 = ra2.getKeyAtIndex(idx2)
+
+		for key1 <= last && key2 <= last {
+			if key1 < key2 {
+				idx1++
+				if idx1 >= length1 {
+					break
+				}
+				key1 = ra1.getKeyAtIndex(idx1)
+			} else if key1 > key2 {
+				ra1.insertNewKeyValueAt(idx1, key2, ra2.getContainerAtIndex(idx2))
+				ra1.needCopyOnWrite[idx1] = true
+				idx2++
+				idx1++
+				length1++
+				if idx2 >= length2 {
+					break
+				}
+				key2 = ra2.getKeyAtIndex(idx2)
+			} else {
+				c1 := ra1.getFastContainerAtIndex(idx1, true)
+
+				ra1.containers[idx1] = c1.lazyIOR(ra2.getContainerAtIndex(idx2))
+				ra1.needCopyOnWrite[idx1] = false
+				idx1++
+				idx2++
+				if idx1 >= length1 || idx2 >= length2 {
+					break
+				}
+
+				key1 = ra1.getKeyAtIndex(idx1)
+				key2 = ra2.getKeyAtIndex(idx2)
+			}
+		}
+	}
+	if idx2 < length2 {
+		key2 = ra2.getKeyAtIndex(idx2)
+		for key2 <= last {
+			ra1.appendCopy(*ra2, idx2)
+			idx2++
+			if idx2 >= length2 {
+				break
+			}
+			key2 = ra2.getKeyAtIndex(idx2)
+		}
+	}
+	return ra1
+}
--- a/vendor/github.com/RoaringBitmap/roaring/popcnt.go
+++ b/vendor/github.com/RoaringBitmap/roaring/popcnt.go
@@ -0,0 +1,11 @@
+// +build go1.9
+// "go1.9", from Go version 1.9 onward
+// See https://golang.org/pkg/go/build/#hdr-Build_Constraints
+
+package roaring
+
+import "math/bits"
+
+func popcount(x uint64) uint64 {
+	return uint64(bits.OnesCount64(x))
+}
--- a/vendor/github.com/RoaringBitmap/roaring/popcnt_amd64.s
+++ b/vendor/github.com/RoaringBitmap/roaring/popcnt_amd64.s
@@ -0,0 +1,103 @@
+// +build amd64,!appengine,!go1.9
+
+TEXT ·hasAsm(SB),4,$0-1
+MOVQ $1, AX
+CPUID
+SHRQ $23, CX
+ANDQ $1, CX
+MOVB CX, ret+0(FP)
+RET
+
+#define POPCNTQ_DX_DX BYTE $0xf3; BYTE $0x48; BYTE $0x0f; BYTE $0xb8; BYTE $0xd2
+
+TEXT ·popcntSliceAsm(SB),4,$0-32
+XORQ	AX, AX
+MOVQ	s+0(FP), SI
+MOVQ	s_len+8(FP), CX
+TESTQ	CX, CX
+JZ		popcntSliceEnd
+popcntSliceLoop:
+BYTE $0xf3; BYTE $0x48; BYTE $0x0f; BYTE $0xb8; BYTE $0x16 // POPCNTQ (SI), DX
+ADDQ	DX, AX
+ADDQ	$8, SI
+LOOP	popcntSliceLoop
+popcntSliceEnd:
+MOVQ	AX, ret+24(FP)
+RET
+
+TEXT ·popcntMaskSliceAsm(SB),4,$0-56
+XORQ	AX, AX
+MOVQ	s+0(FP), SI
+MOVQ	s_len+8(FP), CX
+TESTQ	CX, CX
+JZ		popcntMaskSliceEnd
+MOVQ	m+24(FP), DI
+popcntMaskSliceLoop:
+MOVQ	(DI), DX
+NOTQ	DX
+ANDQ	(SI), DX
+POPCNTQ_DX_DX
+ADDQ	DX, AX
+ADDQ	$8, SI
+ADDQ	$8, DI
+LOOP	popcntMaskSliceLoop
+popcntMaskSliceEnd:
+MOVQ	AX, ret+48(FP)
+RET
+
+TEXT ·popcntAndSliceAsm(SB),4,$0-56
+XORQ	AX, AX
+MOVQ	s+0(FP), SI
+MOVQ	s_len+8(FP), CX
+TESTQ	CX, CX
+JZ		popcntAndSliceEnd
+MOVQ	m+24(FP), DI
+popcntAndSliceLoop:
+MOVQ	(DI), DX
+ANDQ	(SI), DX
+POPCNTQ_DX_DX
+ADDQ	DX, AX
+ADDQ	$8, SI
+ADDQ	$8, DI
+LOOP	popcntAndSliceLoop
+popcntAndSliceEnd:
+MOVQ	AX, ret+48(FP)
+RET
+
+TEXT ·popcntOrSliceAsm(SB),4,$0-56
+XORQ	AX, AX
+MOVQ	s+0(FP), SI
+MOVQ	s_len+8(FP), CX
+TESTQ	CX, CX
+JZ		popcntOrSliceEnd
+MOVQ	m+24(FP), DI
+popcntOrSliceLoop:
+MOVQ	(DI), DX
+ORQ		(SI), DX
+POPCNTQ_DX_DX
+ADDQ	DX, AX
+ADDQ	$8, SI
+ADDQ	$8, DI
+LOOP	popcntOrSliceLoop
+popcntOrSliceEnd:
+MOVQ	AX, ret+48(FP)
+RET
+
+TEXT ·popcntXorSliceAsm(SB),4,$0-56
+XORQ	AX, AX
+MOVQ	s+0(FP), SI
+MOVQ	s_len+8(FP), CX
+TESTQ	CX, CX
+JZ		popcntXorSliceEnd
+MOVQ	m+24(FP), DI
+popcntXorSliceLoop:
+MOVQ	(DI), DX
+XORQ	(SI), DX
+POPCNTQ_DX_DX
+ADDQ	DX, AX
+ADDQ	$8, SI
+ADDQ	$8, DI
+LOOP	popcntXorSliceLoop
+popcntXorSliceEnd:
+MOVQ	AX, ret+48(FP)
+RET
--- a/vendor/github.com/RoaringBitmap/roaring/popcnt_asm.go
+++ b/vendor/github.com/RoaringBitmap/roaring/popcnt_asm.go
@@ -0,0 +1,67 @@
+// +build amd64,!appengine,!go1.9
+
+package roaring
+
+// *** the following functions are defined in popcnt_amd64.s
+
+//go:noescape
+
+func hasAsm() bool
+
+// useAsm is a flag used to select the GO or ASM implementation of the popcnt function
+var useAsm = hasAsm()
+
+//go:noescape
+
+func popcntSliceAsm(s []uint64) uint64
+
+//go:noescape
+
+func popcntMaskSliceAsm(s, m []uint64) uint64
+
+//go:noescape
+
+func popcntAndSliceAsm(s, m []uint64) uint64
+
+//go:noescape
+
+func popcntOrSliceAsm(s, m []uint64) uint64
+
+//go:noescape
+
+func popcntXorSliceAsm(s, m []uint64) uint64
+
+func popcntSlice(s []uint64) uint64 {
+	if useAsm {
+		return popcntSliceAsm(s)
+	}
+	return popcntSliceGo(s)
+}
+
+func popcntMaskSlice(s, m []uint64) uint64 {
+	if useAsm {
+		return popcntMaskSliceAsm(s, m)
+	}
+	return popcntMaskSliceGo(s, m)
+}
+
+func popcntAndSlice(s, m []uint64) uint64 {
+	if useAsm {
+		return popcntAndSliceAsm(s, m)
+	}
+	return popcntAndSliceGo(s, m)
+}
+
+func popcntOrSlice(s, m []uint64) uint64 {
+	if useAsm {
+		return popcntOrSliceAsm(s, m)
+	}
+	return popcntOrSliceGo(s, m)
+}
+
+func popcntXorSlice(s, m []uint64) uint64 {
+	if useAsm {
+		return popcntXorSliceAsm(s, m)
+	}
+	return popcntXorSliceGo(s, m)
+}
--- a/vendor/github.com/RoaringBitmap/roaring/popcnt_compat.go
+++ b/vendor/github.com/RoaringBitmap/roaring/popcnt_compat.go
@@ -0,0 +1,17 @@
+// +build !go1.9
+
+package roaring
+
+// bit population count, take from
+// https://code.google.com/p/go/issues/detail?id=4988#c11
+// credit: https://code.google.com/u/arnehormann/
+// credit: https://play.golang.org/p/U7SogJ7psJ
+// credit: http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
+func popcount(x uint64) uint64 {
+	x -= (x >> 1) & 0x5555555555555555
+	x = (x>>2)&0x3333333333333333 + x&0x3333333333333333
+	x += x >> 4
+	x &= 0x0f0f0f0f0f0f0f0f
+	x *= 0x0101010101010101
+	return x >> 56
+}
--- a/vendor/github.com/RoaringBitmap/roaring/popcnt_generic.go
+++ b/vendor/github.com/RoaringBitmap/roaring/popcnt_generic.go
@@ -0,0 +1,23 @@
+// +build !amd64 appengine go1.9
+
+package roaring
+
+func popcntSlice(s []uint64) uint64 {
+	return popcntSliceGo(s)
+}
+
+func popcntMaskSlice(s, m []uint64) uint64 {
+	return popcntMaskSliceGo(s, m)
+}
+
+func popcntAndSlice(s, m []uint64) uint64 {
+	return popcntAndSliceGo(s, m)
+}
+
+func popcntOrSlice(s, m []uint64) uint64 {
+	return popcntOrSliceGo(s, m)
+}
+
+func popcntXorSlice(s, m []uint64) uint64 {
+	return popcntXorSliceGo(s, m)
+}
--- a/vendor/github.com/RoaringBitmap/roaring/popcnt_slices.go
+++ b/vendor/github.com/RoaringBitmap/roaring/popcnt_slices.go
@@ -0,0 +1,41 @@
+package roaring
+
+func popcntSliceGo(s []uint64) uint64 {
+	cnt := uint64(0)
+	for _, x := range s {
+		cnt += popcount(x)
+	}
+	return cnt
+}
+
+func popcntMaskSliceGo(s, m []uint64) uint64 {
+	cnt := uint64(0)
+	for i := range s {
+		cnt += popcount(s[i] &^ m[i])
+	}
+	return cnt
+}
+
+func popcntAndSliceGo(s, m []uint64) uint64 {
+	cnt := uint64(0)
+	for i := range s {
+		cnt += popcount(s[i] & m[i])
+	}
+	return cnt
+}
+
+func popcntOrSliceGo(s, m []uint64) uint64 {
+	cnt := uint64(0)
+	for i := range s {
+		cnt += popcount(s[i] | m[i])
+	}
+	return cnt
+}
+
+func popcntXorSliceGo(s, m []uint64) uint64 {
+	cnt := uint64(0)
+	for i := range s {
+		cnt += popcount(s[i] ^ m[i])
+	}
+	return cnt
+}
--- a/vendor/github.com/RoaringBitmap/roaring/priorityqueue.go
+++ b/vendor/github.com/RoaringBitmap/roaring/priorityqueue.go
@@ -0,0 +1,101 @@
+package roaring
+
+import "container/heap"
+
+/////////////
+// The priorityQueue is used to keep Bitmaps sorted.
+////////////
+
+type item struct {
+	value *Bitmap
+	index int
+}
+
+type priorityQueue []*item
+
+func (pq priorityQueue) Len() int { return len(pq) }
+
+func (pq priorityQueue) Less(i, j int) bool {
+	return pq[i].value.GetSizeInBytes() < pq[j].value.GetSizeInBytes()
+}
+
+func (pq priorityQueue) Swap(i, j int) {
+	pq[i], pq[j] = pq[j], pq[i]
+	pq[i].index = i
+	pq[j].index = j
+}
+
+func (pq *priorityQueue) Push(x interface{}) {
+	n := len(*pq)
+	item := x.(*item)
+	item.index = n
+	*pq = append(*pq, item)
+}
+
+func (pq *priorityQueue) Pop() interface{} {
+	old := *pq
+	n := len(old)
+	item := old[n-1]
+	item.index = -1 // for safety
+	*pq = old[0 : n-1]
+	return item
+}
+
+func (pq *priorityQueue) update(item *item, value *Bitmap) {
+	item.value = value
+	heap.Fix(pq, item.index)
+}
+
+/////////////
+// The containerPriorityQueue is used to keep the containers of various Bitmaps sorted.
+////////////
+
+type containeritem struct {
+	value    *Bitmap
+	keyindex int
+	index    int
+}
+
+type containerPriorityQueue []*containeritem
+
+func (pq containerPriorityQueue) Len() int { return len(pq) }
+
+func (pq containerPriorityQueue) Less(i, j int) bool {
+	k1 := pq[i].value.highlowcontainer.getKeyAtIndex(pq[i].keyindex)
+	k2 := pq[j].value.highlowcontainer.getKeyAtIndex(pq[j].keyindex)
+	if k1 != k2 {
+		return k1 < k2
+	}
+	c1 := pq[i].value.highlowcontainer.getContainerAtIndex(pq[i].keyindex)
+	c2 := pq[j].value.highlowcontainer.getContainerAtIndex(pq[j].keyindex)
+
+	return c1.getCardinality() > c2.getCardinality()
+}
+
+func (pq containerPriorityQueue) Swap(i, j int) {
+	pq[i], pq[j] = pq[j], pq[i]
+	pq[i].index = i
+	pq[j].index = j
+}
+
+func (pq *containerPriorityQueue) Push(x interface{}) {
+	n := len(*pq)
+	item := x.(*containeritem)
+	item.index = n
+	*pq = append(*pq, item)
+}
+
+func (pq *containerPriorityQueue) Pop() interface{} {
+	old := *pq
+	n := len(old)
+	item := old[n-1]
+	item.index = -1 // for safety
+	*pq = old[0 : n-1]
+	return item
+}
+
+//func (pq *containerPriorityQueue) update(item *containeritem, value *Bitmap, keyindex int) {
+//	item.value = value
+//	item.keyindex = keyindex
+//	heap.Fix(pq, item.index)
+//}
--- a/vendor/github.com/RoaringBitmap/roaring/roaring.go
+++ b/vendor/github.com/RoaringBitmap/roaring/roaring.go
--- a/vendor/github.com/RoaringBitmap/roaring/roaringarray.go
+++ b/vendor/github.com/RoaringBitmap/roaring/roaringarray.go
@@ -0,0 +1,757 @@
+package roaring
+
+import (
+	"bytes"
+	"encoding/binary"
+	"fmt"
+	"io"
+	"github.com/RoaringBitmap/roaring/internal"
+)
+
+type container interface {
+	addOffset(uint16) []container
+
+	clone() container
+	and(container) container
+	andCardinality(container) int
+	iand(container) container // i stands for inplace
+	andNot(container) container
+	iandNot(container) container // i stands for inplace
+	isEmpty() bool
+	getCardinality() int
+	// rank returns the number of integers that are
+	// smaller or equal to x. rank(infinity) would be getCardinality().
+	rank(uint16) int
+
+	iadd(x uint16) bool                   // inplace, returns true if x was new.
+	iaddReturnMinimized(uint16) container // may change return type to minimize storage.
+
+	//addRange(start, final int) container  // range is [firstOfRange,lastOfRange) (unused)
+	iaddRange(start, endx int) container // i stands for inplace, range is [firstOfRange,endx)
+
+	iremove(x uint16) bool                   // inplace, returns true if x was present.
+	iremoveReturnMinimized(uint16) container // may change return type to minimize storage.
+
+	not(start, final int) container        // range is [firstOfRange,lastOfRange)
+	inot(firstOfRange, endx int) container // i stands for inplace, range is [firstOfRange,endx)
+	xor(r container) container
+	getShortIterator() shortPeekable
+	iterate(cb func(x uint16) bool) bool
+	getReverseIterator() shortIterable
+	getManyIterator() manyIterable
+	contains(i uint16) bool
+	maximum() uint16
+	minimum() uint16
+
+	// equals is now logical equals; it does not require the
+	// same underlying container types, but compares across
+	// any of the implementations.
+	equals(r container) bool
+
+	fillLeastSignificant16bits(array []uint32, i int, mask uint32) int
+	or(r container) container
+	orCardinality(r container) int
+	isFull() bool
+	ior(r container) container   // i stands for inplace
+	intersects(r container) bool // whether the two containers intersect
+	lazyOR(r container) container
+	lazyIOR(r container) container
+	getSizeInBytes() int
+	//removeRange(start, final int) container  // range is [firstOfRange,lastOfRange) (unused)
+	iremoveRange(start, final int) container // i stands for inplace, range is [firstOfRange,lastOfRange)
+	selectInt(x uint16) int                  // selectInt returns the xth integer in the container
+	serializedSizeInBytes() int
+	writeTo(io.Writer) (int, error)
+
+	numberOfRuns() int
+	toEfficientContainer() container
+	String() string
+	containerType() contype
+}
+
+type contype uint8
+
+const (
+	bitmapContype contype = iota
+	arrayContype
+	run16Contype
+	run32Contype
+)
+
+// careful: range is [firstOfRange,lastOfRange]
+func rangeOfOnes(start, last int) container {
+	if start > MaxUint16 {
+		panic("rangeOfOnes called with start > MaxUint16")
+	}
+	if last > MaxUint16 {
+		panic("rangeOfOnes called with last > MaxUint16")
+	}
+	if start < 0 {
+		panic("rangeOfOnes called with start < 0")
+	}
+	if last < 0 {
+		panic("rangeOfOnes called with last < 0")
+	}
+	return newRunContainer16Range(uint16(start), uint16(last))
+}
+
+type roaringArray struct {
+	keys            []uint16
+	containers      []container `msg:"-"` // don't try to serialize directly.
+	needCopyOnWrite []bool
+	copyOnWrite     bool
+}
+
+func newRoaringArray() *roaringArray {
+	return &roaringArray{}
+}
+
+// runOptimize compresses the element containers to minimize space consumed.
+// Q: how does this interact with copyOnWrite and needCopyOnWrite?
+// A: since we aren't changing the logical content, just the representation,
+//    we don't bother to check the needCopyOnWrite bits. We replace
+//    (possibly all) elements of ra.containers in-place with space
+//    optimized versions.
+func (ra *roaringArray) runOptimize() {
+	for i := range ra.containers {
+		ra.containers[i] = ra.containers[i].toEfficientContainer()
+	}
+}
+
+func (ra *roaringArray) appendContainer(key uint16, value container, mustCopyOnWrite bool) {
+	ra.keys = append(ra.keys, key)
+	ra.containers = append(ra.containers, value)
+	ra.needCopyOnWrite = append(ra.needCopyOnWrite, mustCopyOnWrite)
+}
+
+func (ra *roaringArray) appendWithoutCopy(sa roaringArray, startingindex int) {
+	mustCopyOnWrite := sa.needCopyOnWrite[startingindex]
+	ra.appendContainer(sa.keys[startingindex], sa.containers[startingindex], mustCopyOnWrite)
+}
+
+func (ra *roaringArray) appendCopy(sa roaringArray, startingindex int) {
+	// cow only if the two request it, or if we already have a lightweight copy
+	copyonwrite := (ra.copyOnWrite && sa.copyOnWrite) || sa.needsCopyOnWrite(startingindex)
+	if !copyonwrite {
+		// since there is no copy-on-write, we need to clone the container (this is important)
+		ra.appendContainer(sa.keys[startingindex], sa.containers[startingindex].clone(), copyonwrite)
+	} else {
+		ra.appendContainer(sa.keys[startingindex], sa.containers[startingindex], copyonwrite)
+		if !sa.needsCopyOnWrite(startingindex) {
+			sa.setNeedsCopyOnWrite(startingindex)
+		}
+	}
+}
+
+func (ra *roaringArray) appendWithoutCopyMany(sa roaringArray, startingindex, end int) {
+	for i := startingindex; i < end; i++ {
+		ra.appendWithoutCopy(sa, i)
+	}
+}
+
+func (ra *roaringArray) appendCopyMany(sa roaringArray, startingindex, end int) {
+	for i := startingindex; i < end; i++ {
+		ra.appendCopy(sa, i)
+	}
+}
+
+func (ra *roaringArray) appendCopiesUntil(sa roaringArray, stoppingKey uint16) {
+	// cow only if the two request it, or if we already have a lightweight copy
+	copyonwrite := ra.copyOnWrite && sa.copyOnWrite
+
+	for i := 0; i < sa.size(); i++ {
+		if sa.keys[i] >= stoppingKey {
+			break
+		}
+		thiscopyonewrite := copyonwrite || sa.needsCopyOnWrite(i)
+		if thiscopyonewrite {
+			ra.appendContainer(sa.keys[i], sa.containers[i], thiscopyonewrite)
+			if !sa.needsCopyOnWrite(i) {
+				sa.setNeedsCopyOnWrite(i)
+			}
+
+		} else {
+			// since there is no copy-on-write, we need to clone the container (this is important)
+			ra.appendContainer(sa.keys[i], sa.containers[i].clone(), thiscopyonewrite)
+
+		}
+	}
+}
+
+func (ra *roaringArray) appendCopiesAfter(sa roaringArray, beforeStart uint16) {
+	// cow only if the two request it, or if we already have a lightweight copy
+	copyonwrite := ra.copyOnWrite && sa.copyOnWrite
+
+	startLocation := sa.getIndex(beforeStart)
+	if startLocation >= 0 {
+		startLocation++
+	} else {
+		startLocation = -startLocation - 1
+	}
+
+	for i := startLocation; i < sa.size(); i++ {
+		thiscopyonewrite := copyonwrite || sa.needsCopyOnWrite(i)
+		if thiscopyonewrite {
+			ra.appendContainer(sa.keys[i], sa.containers[i], thiscopyonewrite)
+			if !sa.needsCopyOnWrite(i) {
+				sa.setNeedsCopyOnWrite(i)
+			}
+		} else {
+			// since there is no copy-on-write, we need to clone the container (this is important)
+			ra.appendContainer(sa.keys[i], sa.containers[i].clone(), thiscopyonewrite)
+
+		}
+	}
+}
+
+func (ra *roaringArray) removeIndexRange(begin, end int) {
+	if end <= begin {
+		return
+	}
+
+	r := end - begin
+
+	copy(ra.keys[begin:], ra.keys[end:])
+	copy(ra.containers[begin:], ra.containers[end:])
+	copy(ra.needCopyOnWrite[begin:], ra.needCopyOnWrite[end:])
+
+	ra.resize(len(ra.keys) - r)
+}
+
+func (ra *roaringArray) resize(newsize int) {
+	for k := newsize; k < len(ra.containers); k++ {
+		ra.containers[k] = nil
+	}
+
+	ra.keys = ra.keys[:newsize]
+	ra.containers = ra.containers[:newsize]
+	ra.needCopyOnWrite = ra.needCopyOnWrite[:newsize]
+}
+
+func (ra *roaringArray) clear() {
+	ra.resize(0)
+	ra.copyOnWrite = false
+}
+
+func (ra *roaringArray) clone() *roaringArray {
+
+	sa := roaringArray{}
+	sa.copyOnWrite = ra.copyOnWrite
+
+	// this is where copyOnWrite is used.
+	if ra.copyOnWrite {
+		sa.keys = make([]uint16, len(ra.keys))
+		copy(sa.keys, ra.keys)
+		sa.containers = make([]container, len(ra.containers))
+		copy(sa.containers, ra.containers)
+		sa.needCopyOnWrite = make([]bool, len(ra.needCopyOnWrite))
+
+		ra.markAllAsNeedingCopyOnWrite()
+		sa.markAllAsNeedingCopyOnWrite()
+
+		// sa.needCopyOnWrite is shared
+	} else {
+		// make a full copy
+
+		sa.keys = make([]uint16, len(ra.keys))
+		copy(sa.keys, ra.keys)
+
+		sa.containers = make([]container, len(ra.containers))
+		for i := range sa.containers {
+			sa.containers[i] = ra.containers[i].clone()
+		}
+
+		sa.needCopyOnWrite = make([]bool, len(ra.needCopyOnWrite))
+	}
+	return &sa
+}
+
+// clone all containers which have needCopyOnWrite set to true
+// This can be used to make sure it is safe to munmap a []byte
+// that the roaring array may still have a reference to.
+func (ra *roaringArray) cloneCopyOnWriteContainers() {
+	for i, needCopyOnWrite := range ra.needCopyOnWrite {
+		if needCopyOnWrite {
+			ra.containers[i] = ra.containers[i].clone()
+			ra.needCopyOnWrite[i] = false
+		}
+	}
+}
+
+// unused function:
+//func (ra *roaringArray) containsKey(x uint16) bool {
+//	return (ra.binarySearch(0, int64(len(ra.keys)), x) >= 0)
+//}
+
+func (ra *roaringArray) getContainer(x uint16) container {
+	i := ra.binarySearch(0, int64(len(ra.keys)), x)
+	if i < 0 {
+		return nil
+	}
+	return ra.containers[i]
+}
+
+func (ra *roaringArray) getContainerAtIndex(i int) container {
+	return ra.containers[i]
+}
+
+func (ra *roaringArray) getFastContainerAtIndex(i int, needsWriteable bool) container {
+	c := ra.getContainerAtIndex(i)
+	switch t := c.(type) {
+	case *arrayContainer:
+		c = t.toBitmapContainer()
+	case *runContainer16:
+		if !t.isFull() {
+			c = t.toBitmapContainer()
+		}
+	case *bitmapContainer:
+		if needsWriteable && ra.needCopyOnWrite[i] {
+			c = ra.containers[i].clone()
+		}
+	}
+	return c
+}
+
+// getUnionedWritableContainer switches behavior for in-place Or
+// depending on whether the container requires a copy on write.
+// If it does using the non-inplace or() method leads to fewer allocations.
+func (ra *roaringArray) getUnionedWritableContainer(pos int, other container) container {
+	if ra.needCopyOnWrite[pos] {
+		return ra.getContainerAtIndex(pos).or(other)
+	}
+	return ra.getContainerAtIndex(pos).ior(other)
+
+}
+
+func (ra *roaringArray) getWritableContainerAtIndex(i int) container {
+	if ra.needCopyOnWrite[i] {
+		ra.containers[i] = ra.containers[i].clone()
+		ra.needCopyOnWrite[i] = false
+	}
+	return ra.containers[i]
+}
+
+func (ra *roaringArray) getIndex(x uint16) int {
+	// before the binary search, we optimize for frequent cases
+	size := len(ra.keys)
+	if (size == 0) || (ra.keys[size-1] == x) {
+		return size - 1
+	}
+	return ra.binarySearch(0, int64(size), x)
+}
+
+func (ra *roaringArray) getKeyAtIndex(i int) uint16 {
+	return ra.keys[i]
+}
+
+func (ra *roaringArray) insertNewKeyValueAt(i int, key uint16, value container) {
+	ra.keys = append(ra.keys, 0)
+	ra.containers = append(ra.containers, nil)
+
+	copy(ra.keys[i+1:], ra.keys[i:])
+	copy(ra.containers[i+1:], ra.containers[i:])
+
+	ra.keys[i] = key
+	ra.containers[i] = value
+
+	ra.needCopyOnWrite = append(ra.needCopyOnWrite, false)
+	copy(ra.needCopyOnWrite[i+1:], ra.needCopyOnWrite[i:])
+	ra.needCopyOnWrite[i] = false
+}
+
+func (ra *roaringArray) remove(key uint16) bool {
+	i := ra.binarySearch(0, int64(len(ra.keys)), key)
+	if i >= 0 { // if a new key
+		ra.removeAtIndex(i)
+		return true
+	}
+	return false
+}
+
+func (ra *roaringArray) removeAtIndex(i int) {
+	copy(ra.keys[i:], ra.keys[i+1:])
+	copy(ra.containers[i:], ra.containers[i+1:])
+
+	copy(ra.needCopyOnWrite[i:], ra.needCopyOnWrite[i+1:])
+
+	ra.resize(len(ra.keys) - 1)
+}
+
+func (ra *roaringArray) setContainerAtIndex(i int, c container) {
+	ra.containers[i] = c
+}
+
+func (ra *roaringArray) replaceKeyAndContainerAtIndex(i int, key uint16, c container, mustCopyOnWrite bool) {
+	ra.keys[i] = key
+	ra.containers[i] = c
+	ra.needCopyOnWrite[i] = mustCopyOnWrite
+}
+
+func (ra *roaringArray) size() int {
+	return len(ra.keys)
+}
+
+func (ra *roaringArray) binarySearch(begin, end int64, ikey uint16) int {
+	low := begin
+	high := end - 1
+	for low+16 <= high {
+		middleIndex := low + (high-low)/2 // avoid overflow
+		middleValue := ra.keys[middleIndex]
+
+		if middleValue < ikey {
+			low = middleIndex + 1
+		} else if middleValue > ikey {
+			high = middleIndex - 1
+		} else {
+			return int(middleIndex)
+		}
+	}
+	for ; low <= high; low++ {
+		val := ra.keys[low]
+		if val >= ikey {
+			if val == ikey {
+				return int(low)
+			}
+			break
+		}
+	}
+	return -int(low + 1)
+}
+
+func (ra *roaringArray) equals(o interface{}) bool {
+	srb, ok := o.(roaringArray)
+	if ok {
+
+		if srb.size() != ra.size() {
+			return false
+		}
+		for i, k := range ra.keys {
+			if k != srb.keys[i] {
+				return false
+			}
+		}
+
+		for i, c := range ra.containers {
+			if !c.equals(srb.containers[i]) {
+				return false
+			}
+		}
+		return true
+	}
+	return false
+}
+
+func (ra *roaringArray) headerSize() uint64 {
+	size := uint64(len(ra.keys))
+	if ra.hasRunCompression() {
+		if size < noOffsetThreshold { // for small bitmaps, we omit the offsets
+			return 4 + (size+7)/8 + 4*size
+		}
+		return 4 + (size+7)/8 + 8*size // - 4 because we pack the size with the cookie
+	}
+	return 4 + 4 + 8*size
+
+}
+
+// should be dirt cheap
+func (ra *roaringArray) serializedSizeInBytes() uint64 {
+	answer := ra.headerSize()
+	for _, c := range ra.containers {
+		answer += uint64(c.serializedSizeInBytes())
+	}
+	return answer
+}
+
+//
+// spec: https://github.com/RoaringBitmap/RoaringFormatSpec
+//
+func (ra *roaringArray) writeTo(w io.Writer) (n int64, err error) {
+	hasRun := ra.hasRunCompression()
+	isRunSizeInBytes := 0
+	cookieSize := 8
+	if hasRun {
+		cookieSize = 4
+		isRunSizeInBytes = (len(ra.keys) + 7) / 8
+	}
+	descriptiveHeaderSize := 4 * len(ra.keys)
+	preambleSize := cookieSize + isRunSizeInBytes + descriptiveHeaderSize
+
+	buf := make([]byte, preambleSize+4*len(ra.keys))
+
+	nw := 0
+
+	if hasRun {
+		binary.LittleEndian.PutUint16(buf[0:], uint16(serialCookie))
+		nw += 2
+		binary.LittleEndian.PutUint16(buf[2:], uint16(len(ra.keys)-1))
+		nw += 2
+		// compute isRun bitmap without temporary allocation
+		var runbitmapslice = buf[nw : nw+isRunSizeInBytes]
+		for i, c := range ra.containers {
+			switch c.(type) {
+			case *runContainer16:
+				runbitmapslice[i/8] |= 1 << (uint(i) % 8)
+			}
+		}
+		nw += isRunSizeInBytes
+	} else {
+		binary.LittleEndian.PutUint32(buf[0:], uint32(serialCookieNoRunContainer))
+		nw += 4
+		binary.LittleEndian.PutUint32(buf[4:], uint32(len(ra.keys)))
+		nw += 4
+	}
+
+	// descriptive header
+	for i, key := range ra.keys {
+		binary.LittleEndian.PutUint16(buf[nw:], key)
+		nw += 2
+		c := ra.containers[i]
+		binary.LittleEndian.PutUint16(buf[nw:], uint16(c.getCardinality()-1))
+		nw += 2
+	}
+
+	startOffset := int64(preambleSize + 4*len(ra.keys))
+	if !hasRun || (len(ra.keys) >= noOffsetThreshold) {
+		// offset header
+		for _, c := range ra.containers {
+			binary.LittleEndian.PutUint32(buf[nw:], uint32(startOffset))
+			nw += 4
+			switch rc := c.(type) {
+			case *runContainer16:
+				startOffset += 2 + int64(len(rc.iv))*4
+			default:
+				startOffset += int64(getSizeInBytesFromCardinality(c.getCardinality()))
+			}
+		}
+	}
+
+	written, err := w.Write(buf[:nw])
+	if err != nil {
+		return n, err
+	}
+	n += int64(written)
+
+	for _, c := range ra.containers {
+		written, err := c.writeTo(w)
+		if err != nil {
+			return n, err
+		}
+		n += int64(written)
+	}
+	return n, nil
+}
+
+//
+// spec: https://github.com/RoaringBitmap/RoaringFormatSpec
+//
+func (ra *roaringArray) toBytes() ([]byte, error) {
+	var buf bytes.Buffer
+	_, err := ra.writeTo(&buf)
+	return buf.Bytes(), err
+}
+
+func (ra *roaringArray) readFrom(stream internal.ByteInput, cookieHeader ...byte) (int64, error) {
+  var cookie uint32
+	var err error
+  if len(cookieHeader) > 0 && len(cookieHeader) != 4 {
+		return int64(len(cookieHeader)), fmt.Errorf("error in roaringArray.readFrom: could not read initial cookie: incorrect size of cookie header")
+	}
+	if len(cookieHeader) == 4 {
+		cookie = binary.LittleEndian.Uint32(cookieHeader)
+	} else {
+		cookie, err = stream.ReadUInt32()
+		if err != nil {
+			return stream.GetReadBytes(), fmt.Errorf("error in roaringArray.readFrom: could not read initial cookie: %s", err)
+		}
+	}
+
+	var size uint32
+	var isRunBitmap []byte
+
+	if cookie&0x0000FFFF == serialCookie {
+		size = uint32(cookie>>16 + 1)
+		// create is-run-container bitmap
+		isRunBitmapSize := (int(size) + 7) / 8
+		isRunBitmap, err = stream.Next(isRunBitmapSize)
+
+		if err != nil {
+			return stream.GetReadBytes(), fmt.Errorf("malformed bitmap, failed to read is-run bitmap, got: %s", err)
+		}
+	} else if cookie == serialCookieNoRunContainer {
+		size, err = stream.ReadUInt32()
+		if err != nil {
+			return stream.GetReadBytes(), fmt.Errorf("malformed bitmap, failed to read a bitmap size: %s", err)
+		}
+	} else {
+		return stream.GetReadBytes(), fmt.Errorf("error in roaringArray.readFrom: did not find expected serialCookie in header")
+	}
+
+	if size > (1 << 16) {
+		return stream.GetReadBytes(), fmt.Errorf("it is logically impossible to have more than (1<<16) containers")
+	}
+
+	// descriptive header
+	buf, err := stream.Next(2 * 2 * int(size))
+
+	if err != nil {
+		return stream.GetReadBytes(), fmt.Errorf("failed to read descriptive header: %s", err)
+	}
+
+	keycard := byteSliceAsUint16Slice(buf)
+
+	if isRunBitmap == nil || size >= noOffsetThreshold {
+		if err := stream.SkipBytes(int(size) * 4); err != nil {
+			return stream.GetReadBytes(), fmt.Errorf("failed to skip bytes: %s", err)
+		}
+	}
+
+	// Allocate slices upfront as number of containers is known
+	if cap(ra.containers) >= int(size) {
+		ra.containers = ra.containers[:size]
+	} else {
+		ra.containers = make([]container, size)
+	}
+
+	if cap(ra.keys) >= int(size) {
+		ra.keys = ra.keys[:size]
+	} else {
+		ra.keys = make([]uint16, size)
+	}
+
+	if cap(ra.needCopyOnWrite) >= int(size) {
+		ra.needCopyOnWrite = ra.needCopyOnWrite[:size]
+	} else {
+		ra.needCopyOnWrite = make([]bool, size)
+	}
+
+	for i := uint32(0); i < size; i++ {
+		key := keycard[2*i]
+		card := int(keycard[2*i+1]) + 1
+		ra.keys[i] = key
+		ra.needCopyOnWrite[i] = true
+
+		if isRunBitmap != nil && isRunBitmap[i/8]&(1<<(i%8)) != 0 {
+			// run container
+			nr, err := stream.ReadUInt16()
+
+			if err != nil {
+				return 0, fmt.Errorf("failed to read runtime container size: %s", err)
+			}
+
+			buf, err := stream.Next(int(nr) * 4)
+
+			if err != nil {
+				return stream.GetReadBytes(), fmt.Errorf("failed to read runtime container content: %s", err)
+			}
+
+			nb := runContainer16{
+				iv:   byteSliceAsInterval16Slice(buf),
+			}
+
+			ra.containers[i] = &nb
+		} else if card > arrayDefaultMaxSize {
+			// bitmap container
+			buf, err := stream.Next(arrayDefaultMaxSize * 2)
+
+			if err != nil {
+				return stream.GetReadBytes(), fmt.Errorf("failed to read bitmap container: %s", err)
+			}
+
+			nb := bitmapContainer{
+				cardinality: card,
+				bitmap:      byteSliceAsUint64Slice(buf),
+			}
+
+			ra.containers[i] = &nb
+		} else {
+			// array container
+			buf, err := stream.Next(card * 2)
+
+			if err != nil {
+				return stream.GetReadBytes(), fmt.Errorf("failed to read array container: %s", err)
+			}
+
+			nb := arrayContainer{
+				byteSliceAsUint16Slice(buf),
+			}
+
+			ra.containers[i] = &nb
+		}
+	}
+
+	return stream.GetReadBytes(), nil
+}
+
+func (ra *roaringArray) hasRunCompression() bool {
+	for _, c := range ra.containers {
+		switch c.(type) {
+		case *runContainer16:
+			return true
+		}
+	}
+	return false
+}
+
+func (ra *roaringArray) advanceUntil(min uint16, pos int) int {
+	lower := pos + 1
+
+	if lower >= len(ra.keys) || ra.keys[lower] >= min {
+		return lower
+	}
+
+	spansize := 1
+
+	for lower+spansize < len(ra.keys) && ra.keys[lower+spansize] < min {
+		spansize *= 2
+	}
+	var upper int
+	if lower+spansize < len(ra.keys) {
+		upper = lower + spansize
+	} else {
+		upper = len(ra.keys) - 1
+	}
+
+	if ra.keys[upper] == min {
+		return upper
+	}
+
+	if ra.keys[upper] < min {
+		// means
+		// array
+		// has no
+		// item
+		// >= min
+		// pos = array.length;
+		return len(ra.keys)
+	}
+
+	// we know that the next-smallest span was too small
+	lower += (spansize >> 1)
+
+	mid := 0
+	for lower+1 != upper {
+		mid = (lower + upper) >> 1
+		if ra.keys[mid] == min {
+			return mid
+		} else if ra.keys[mid] < min {
+			lower = mid
+		} else {
+			upper = mid
+		}
+	}
+	return upper
+}
+
+func (ra *roaringArray) markAllAsNeedingCopyOnWrite() {
+	for i := range ra.needCopyOnWrite {
+		ra.needCopyOnWrite[i] = true
+	}
+}
+
+func (ra *roaringArray) needsCopyOnWrite(i int) bool {
+	return ra.needCopyOnWrite[i]
+}
+
+func (ra *roaringArray) setNeedsCopyOnWrite(i int) {
+	ra.needCopyOnWrite[i] = true
+}
--- a/vendor/github.com/RoaringBitmap/roaring/runcontainer.go
+++ b/vendor/github.com/RoaringBitmap/roaring/runcontainer.go
--- a/vendor/github.com/RoaringBitmap/roaring/serialization.go
+++ b/vendor/github.com/RoaringBitmap/roaring/serialization.go
@@ -0,0 +1,19 @@
+package roaring
+
+import (
+	"encoding/binary"
+	"io"
+)
+
+// writeTo for runContainer16 follows this
+// spec: https://github.com/RoaringBitmap/RoaringFormatSpec
+//
+func (b *runContainer16) writeTo(stream io.Writer) (int, error) {
+	buf := make([]byte, 2+4*len(b.iv))
+	binary.LittleEndian.PutUint16(buf[0:], uint16(len(b.iv)))
+	for i, v := range b.iv {
+		binary.LittleEndian.PutUint16(buf[2+i*4:], v.start)
+		binary.LittleEndian.PutUint16(buf[2+2+i*4:], v.length)
+	}
+	return stream.Write(buf)
+}
--- a/vendor/github.com/RoaringBitmap/roaring/serialization_generic.go
+++ b/vendor/github.com/RoaringBitmap/roaring/serialization_generic.go
@@ -0,0 +1,133 @@
+// +build !amd64,!386,!arm,!arm64,!ppc64le,!mipsle,!mips64le,!mips64p32le,!wasm appengine
+
+package roaring
+
+import (
+	"encoding/binary"
+	"errors"
+	"io"
+)
+
+func (b *arrayContainer) writeTo(stream io.Writer) (int, error) {
+	buf := make([]byte, 2*len(b.content))
+	for i, v := range b.content {
+		base := i * 2
+		buf[base] = byte(v)
+		buf[base+1] = byte(v >> 8)
+	}
+	return stream.Write(buf)
+}
+
+func (b *arrayContainer) readFrom(stream io.Reader) (int, error) {
+	err := binary.Read(stream, binary.LittleEndian, b.content)
+	if err != nil {
+		return 0, err
+	}
+	return 2 * len(b.content), nil
+}
+
+func (b *bitmapContainer) writeTo(stream io.Writer) (int, error) {
+	if b.cardinality <= arrayDefaultMaxSize {
+		return 0, errors.New("refusing to write bitmap container with cardinality of array container")
+	}
+
+	// Write set
+	buf := make([]byte, 8*len(b.bitmap))
+	for i, v := range b.bitmap {
+		base := i * 8
+		buf[base] = byte(v)
+		buf[base+1] = byte(v >> 8)
+		buf[base+2] = byte(v >> 16)
+		buf[base+3] = byte(v >> 24)
+		buf[base+4] = byte(v >> 32)
+		buf[base+5] = byte(v >> 40)
+		buf[base+6] = byte(v >> 48)
+		buf[base+7] = byte(v >> 56)
+	}
+	return stream.Write(buf)
+}
+
+func (b *bitmapContainer) readFrom(stream io.Reader) (int, error) {
+	err := binary.Read(stream, binary.LittleEndian, b.bitmap)
+	if err != nil {
+		return 0, err
+	}
+	b.computeCardinality()
+	return 8 * len(b.bitmap), nil
+}
+
+func (bc *bitmapContainer) asLittleEndianByteSlice() []byte {
+	by := make([]byte, len(bc.bitmap)*8)
+	for i := range bc.bitmap {
+		binary.LittleEndian.PutUint64(by[i*8:], bc.bitmap[i])
+	}
+	return by
+}
+
+func uint64SliceAsByteSlice(slice []uint64) []byte {
+	by := make([]byte, len(slice)*8)
+
+	for i, v := range slice {
+		binary.LittleEndian.PutUint64(by[i*8:], v)
+	}
+
+	return by
+}
+
+func uint16SliceAsByteSlice(slice []uint16) []byte {
+	by := make([]byte, len(slice)*2)
+
+	for i, v := range slice {
+		binary.LittleEndian.PutUint16(by[i*2:], v)
+	}
+
+	return by
+}
+
+func byteSliceAsUint16Slice(slice []byte) []uint16 {
+	if len(slice)%2 != 0 {
+		panic("Slice size should be divisible by 2")
+	}
+
+	b := make([]uint16, len(slice)/2)
+
+	for i := range b {
+		b[i] = binary.LittleEndian.Uint16(slice[2*i:])
+	}
+
+	return b
+}
+
+func byteSliceAsUint64Slice(slice []byte) []uint64 {
+	if len(slice)%8 != 0 {
+		panic("Slice size should be divisible by 8")
+	}
+
+	b := make([]uint64, len(slice)/8)
+
+	for i := range b {
+		b[i] = binary.LittleEndian.Uint64(slice[8*i:])
+	}
+
+	return b
+}
+
+// Converts a byte slice to a interval16 slice.
+// The function assumes that the slice byte buffer is run container data
+// encoded according to Roaring Format Spec
+func byteSliceAsInterval16Slice(byteSlice []byte) []interval16 {
+	if len(byteSlice)%4 != 0 {
+		panic("Slice size should be divisible by 4")
+	}
+
+	intervalSlice := make([]interval16, len(byteSlice)/4)
+
+	for i := range intervalSlice {
+		intervalSlice[i] = interval16{
+			start:  binary.LittleEndian.Uint16(byteSlice[i*4:]),
+			length: binary.LittleEndian.Uint16(byteSlice[i*4+2:]),
+		}
+	}
+
+	return intervalSlice
+}
--- a/vendor/github.com/RoaringBitmap/roaring/serialization_littleendian.go
+++ b/vendor/github.com/RoaringBitmap/roaring/serialization_littleendian.go
@@ -0,0 +1,417 @@
+// +build 386,!appengine amd64,!appengine arm,!appengine arm64,!appengine ppc64le,!appengine mipsle,!appengine mips64le,!appengine mips64p32le,!appengine wasm,!appengine
+
+package roaring
+
+import (
+	"encoding/binary"
+	"errors"
+	"io"
+	"reflect"
+	"runtime"
+	"unsafe"
+)
+
+func (ac *arrayContainer) writeTo(stream io.Writer) (int, error) {
+	buf := uint16SliceAsByteSlice(ac.content)
+	return stream.Write(buf)
+}
+
+func (bc *bitmapContainer) writeTo(stream io.Writer) (int, error) {
+	if bc.cardinality <= arrayDefaultMaxSize {
+		return 0, errors.New("refusing to write bitmap container with cardinality of array container")
+	}
+	buf := uint64SliceAsByteSlice(bc.bitmap)
+	return stream.Write(buf)
+}
+
+func uint64SliceAsByteSlice(slice []uint64) []byte {
+	// make a new slice header
+	header := *(*reflect.SliceHeader)(unsafe.Pointer(&slice))
+
+	// update its capacity and length
+	header.Len *= 8
+	header.Cap *= 8
+
+	// instantiate result and use KeepAlive so data isn't unmapped.
+	result := *(*[]byte)(unsafe.Pointer(&header))
+	runtime.KeepAlive(&slice)
+
+	// return it
+	return result
+}
+
+func uint16SliceAsByteSlice(slice []uint16) []byte {
+	// make a new slice header
+	header := *(*reflect.SliceHeader)(unsafe.Pointer(&slice))
+
+	// update its capacity and length
+	header.Len *= 2
+	header.Cap *= 2
+
+	// instantiate result and use KeepAlive so data isn't unmapped.
+	result := *(*[]byte)(unsafe.Pointer(&header))
+	runtime.KeepAlive(&slice)
+
+	// return it
+	return result
+}
+
+func (bc *bitmapContainer) asLittleEndianByteSlice() []byte {
+	return uint64SliceAsByteSlice(bc.bitmap)
+}
+
+// Deserialization code follows
+
+////
+// These methods (byteSliceAsUint16Slice,...) do not make copies,
+// they are pointer-based (unsafe). The caller is responsible to
+// ensure that the input slice does not get garbage collected, deleted
+// or modified while you hold the returned slince.
+////
+func byteSliceAsUint16Slice(slice []byte) (result []uint16) { // here we create a new slice holder
+	if len(slice)%2 != 0 {
+		panic("Slice size should be divisible by 2")
+	}
+	// reference: https://go101.org/article/unsafe.html
+
+	// make a new slice header
+	bHeader := (*reflect.SliceHeader)(unsafe.Pointer(&slice))
+	rHeader := (*reflect.SliceHeader)(unsafe.Pointer(&result))
+
+	// transfer the data from the given slice to a new variable (our result)
+	rHeader.Data = bHeader.Data
+	rHeader.Len = bHeader.Len / 2
+	rHeader.Cap = bHeader.Cap / 2
+
+	// instantiate result and use KeepAlive so data isn't unmapped.
+	runtime.KeepAlive(&slice) // it is still crucial, GC can free it)
+
+	// return result
+	return
+}
+
+func byteSliceAsUint64Slice(slice []byte) (result []uint64) {
+	if len(slice)%8 != 0 {
+		panic("Slice size should be divisible by 8")
+	}
+	// reference: https://go101.org/article/unsafe.html
+
+	// make a new slice header
+	bHeader := (*reflect.SliceHeader)(unsafe.Pointer(&slice))
+	rHeader := (*reflect.SliceHeader)(unsafe.Pointer(&result))
+
+	// transfer the data from the given slice to a new variable (our result)
+	rHeader.Data = bHeader.Data
+	rHeader.Len = bHeader.Len / 8
+	rHeader.Cap = bHeader.Cap / 8
+
+	// instantiate result and use KeepAlive so data isn't unmapped.
+	runtime.KeepAlive(&slice) // it is still crucial, GC can free it)
+
+	// return result
+	return
+}
+
+func byteSliceAsInterval16Slice(slice []byte) (result []interval16) {
+	if len(slice)%4 != 0 {
+		panic("Slice size should be divisible by 4")
+	}
+	// reference: https://go101.org/article/unsafe.html
+
+	// make a new slice header
+	bHeader := (*reflect.SliceHeader)(unsafe.Pointer(&slice))
+	rHeader := (*reflect.SliceHeader)(unsafe.Pointer(&result))
+
+	// transfer the data from the given slice to a new variable (our result)
+	rHeader.Data = bHeader.Data
+	rHeader.Len = bHeader.Len / 4
+	rHeader.Cap = bHeader.Cap / 4
+
+	// instantiate result and use KeepAlive so data isn't unmapped.
+	runtime.KeepAlive(&slice) // it is still crucial, GC can free it)
+
+	// return result
+	return
+}
+
+// FromBuffer creates a bitmap from its serialized version stored in buffer.
+// It uses CRoaring's frozen bitmap format.
+//
+// The format specification is available here:
+// https://github.com/RoaringBitmap/CRoaring/blob/2c867e9f9c9e2a3a7032791f94c4c7ae3013f6e0/src/roaring.c#L2756-L2783
+//
+// The provided byte array (buf) is expected to be a constant.
+// The function makes the best effort attempt not to copy data.
+// Only little endian is supported. The function will err if it detects a big
+// endian serialized file.
+// You should take care not to modify buff as it will likely result in
+// unexpected program behavior.
+// If said buffer comes from a memory map, it's advisable to give it read
+// only permissions, either at creation or by calling Mprotect from the
+// golang.org/x/sys/unix package.
+//
+// Resulting bitmaps are effectively immutable in the following sense:
+// a copy-on-write marker is used so that when you modify the resulting
+// bitmap, copies of selected data (containers) are made.
+// You should *not* change the copy-on-write status of the resulting
+// bitmaps (SetCopyOnWrite).
+//
+// If buf becomes unavailable, then a bitmap created with
+// FromBuffer would be effectively broken. Furthermore, any
+// bitmap derived from this bitmap (e.g., via Or, And) might
+// also be broken. Thus, before making buf unavailable, you should
+// call CloneCopyOnWriteContainers on all such bitmaps.
+//
+func (rb *Bitmap) FrozenView(buf []byte) error {
+	return rb.highlowcontainer.frozenView(buf)
+}
+
+/* Verbatim specification from CRoaring.
+ *
+ * FROZEN SERIALIZATION FORMAT DESCRIPTION
+ *
+ * -- (beginning must be aligned by 32 bytes) --
+ * <bitset_data> uint64_t[BITSET_CONTAINER_SIZE_IN_WORDS * num_bitset_containers]
+ * <run_data>    rle16_t[total number of rle elements in all run containers]
+ * <array_data>  uint16_t[total number of array elements in all array containers]
+ * <keys>        uint16_t[num_containers]
+ * <counts>      uint16_t[num_containers]
+ * <typecodes>   uint8_t[num_containers]
+ * <header>      uint32_t
+ *
+ * <header> is a 4-byte value which is a bit union of FROZEN_COOKIE (15 bits)
+ * and the number of containers (17 bits).
+ *
+ * <counts> stores number of elements for every container.
+ * Its meaning depends on container type.
+ * For array and bitset containers, this value is the container cardinality minus one.
+ * For run container, it is the number of rle_t elements (n_runs).
+ *
+ * <bitset_data>,<array_data>,<run_data> are flat arrays of elements of
+ * all containers of respective type.
+ *
+ * <*_data> and <keys> are kept close together because they are not accessed
+ * during deserilization. This may reduce IO in case of large mmaped bitmaps.
+ * All members have their native alignments during deserilization except <header>,
+ * which is not guaranteed to be aligned by 4 bytes.
+ */
+const FROZEN_COOKIE = 13766
+
+var (
+	FrozenBitmapInvalidCookie = errors.New("header does not contain the FROZEN_COOKIE")
+	FrozenBitmapBigEndian = errors.New("loading big endian frozen bitmaps is not supported")
+	FrozenBitmapIncomplete = errors.New("input buffer too small to contain a frozen bitmap")
+	FrozenBitmapOverpopulated = errors.New("too many containers")
+	FrozenBitmapUnexpectedData = errors.New("spurious data in input")
+	FrozenBitmapInvalidTypecode = errors.New("unrecognized typecode")
+	FrozenBitmapBufferTooSmall = errors.New("buffer too small")
+)
+
+func (ra *roaringArray) frozenView(buf []byte) error {
+	if len(buf) < 4 {
+		return FrozenBitmapIncomplete
+	}
+
+	headerBE := binary.BigEndian.Uint32(buf[len(buf)-4:])
+	if headerBE & 0x7fff == FROZEN_COOKIE {
+		return FrozenBitmapBigEndian
+	}
+
+	header := binary.LittleEndian.Uint32(buf[len(buf)-4:])
+	buf = buf[:len(buf)-4]
+
+	if header & 0x7fff != FROZEN_COOKIE {
+		return FrozenBitmapInvalidCookie
+	}
+
+	nCont := int(header >> 15)
+	if nCont > (1 << 16) {
+		return FrozenBitmapOverpopulated
+	}
+
+	// 1 byte per type, 2 bytes per key, 2 bytes per count.
+	if len(buf) < 5*nCont {
+		return FrozenBitmapIncomplete
+	}
+
+	types := buf[len(buf)-nCont:]
+	buf = buf[:len(buf)-nCont]
+
+	counts := byteSliceAsUint16Slice(buf[len(buf)-2*nCont:])
+	buf = buf[:len(buf)-2*nCont]
+
+	keys := byteSliceAsUint16Slice(buf[len(buf)-2*nCont:])
+	buf = buf[:len(buf)-2*nCont]
+
+	nBitmap, nArray, nRun := uint64(0), uint64(0), uint64(0)
+	nArrayEl, nRunEl := uint64(0), uint64(0)
+	for i, t := range types {
+		switch (t) {
+		case 1:
+			nBitmap++
+		case 2:
+			nArray++
+			nArrayEl += uint64(counts[i])+1
+		case 3:
+			nRun++
+			nRunEl += uint64(counts[i])
+		default:
+			return FrozenBitmapInvalidTypecode
+		}
+	}
+
+	if uint64(len(buf)) < (1 << 13)*nBitmap + 4*nRunEl + 2*nArrayEl {
+		return FrozenBitmapIncomplete
+	}
+
+	bitsetsArena := byteSliceAsUint64Slice(buf[:(1 << 13)*nBitmap])
+	buf = buf[(1 << 13)*nBitmap:]
+
+	runsArena := byteSliceAsInterval16Slice(buf[:4*nRunEl])
+	buf = buf[4*nRunEl:]
+
+	arraysArena := byteSliceAsUint16Slice(buf[:2*nArrayEl])
+	buf = buf[2*nArrayEl:]
+
+	if len(buf) != 0 {
+		return FrozenBitmapUnexpectedData
+	}
+
+	// TODO: maybe arena_alloc all this.
+	containers := make([]container, nCont)
+	bitsets := make([]bitmapContainer, nBitmap)
+	arrays := make([]arrayContainer, nArray)
+	runs := make([]runContainer16, nRun)
+	needCOW := make([]bool, nCont)
+
+	iBitset, iArray, iRun := uint64(0), uint64(0), uint64(0)
+	for i, t := range types {
+		needCOW[i] = true
+
+		switch (t) {
+		case 1:
+			containers[i] = &bitsets[iBitset]
+			bitsets[iBitset].cardinality = int(counts[i])+1
+			bitsets[iBitset].bitmap = bitsetsArena[:1024]
+			bitsetsArena = bitsetsArena[1024:]
+			iBitset++
+		case 2:
+			containers[i] = &arrays[iArray]
+			sz := int(counts[i])+1
+			arrays[iArray].content = arraysArena[:sz]
+			arraysArena = arraysArena[sz:]
+			iArray++
+		case 3:
+			containers[i] = &runs[iRun]
+			runs[iRun].iv = runsArena[:counts[i]]
+			runsArena = runsArena[counts[i]:]
+			iRun++
+		}
+	}
+
+	// Not consuming the full input is a bug.
+	if iBitset != nBitmap || len(bitsetsArena) != 0 ||
+		iArray != nArray || len(arraysArena) != 0 ||
+		iRun != nRun || len(runsArena) != 0 {
+		panic("we missed something")
+	}
+
+	ra.keys = keys
+	ra.containers = containers
+	ra.needCopyOnWrite = needCOW
+	ra.copyOnWrite = true
+
+	return nil
+}
+
+func (bm *Bitmap) GetFrozenSizeInBytes() uint64 {
+	nBits, nArrayEl, nRunEl := uint64(0), uint64(0), uint64(0)
+	for _, c := range bm.highlowcontainer.containers {
+		switch v := c.(type) {
+		case *bitmapContainer:
+			nBits++
+		case *arrayContainer:
+			nArrayEl += uint64(len(v.content))
+		case *runContainer16:
+			nRunEl += uint64(len(v.iv))
+		}
+	}
+	return 4 + 5*uint64(len(bm.highlowcontainer.containers)) +
+		(nBits << 13) + 2*nArrayEl + 4*nRunEl
+}
+
+func (bm *Bitmap) Freeze() ([]byte, error) {
+	sz := bm.GetFrozenSizeInBytes()
+	buf := make([]byte, sz)
+	_, err := bm.FreezeTo(buf)
+	return buf, err
+}
+
+func (bm *Bitmap) FreezeTo(buf []byte) (int, error) {
+	containers := bm.highlowcontainer.containers
+	nCont := len(containers)
+
+	nBits, nArrayEl, nRunEl := 0, 0, 0
+	for _, c := range containers {
+		switch v := c.(type) {
+		case *bitmapContainer:
+			nBits++
+		case *arrayContainer:
+			nArrayEl += len(v.content)
+		case *runContainer16:
+			nRunEl += len(v.iv)
+		}
+	}
+
+	serialSize := 4 + 5*nCont + (1 << 13)*nBits + 4*nRunEl + 2*nArrayEl
+	if len(buf) < serialSize {
+		return 0, FrozenBitmapBufferTooSmall
+	}
+
+	bitsArena := byteSliceAsUint64Slice(buf[:(1 << 13)*nBits])
+	buf = buf[(1 << 13)*nBits:]
+
+	runsArena := byteSliceAsInterval16Slice(buf[:4*nRunEl])
+	buf = buf[4*nRunEl:]
+
+	arraysArena := byteSliceAsUint16Slice(buf[:2*nArrayEl])
+	buf = buf[2*nArrayEl:]
+
+	keys := byteSliceAsUint16Slice(buf[:2*nCont])
+	buf = buf[2*nCont:]
+
+	counts := byteSliceAsUint16Slice(buf[:2*nCont])
+	buf = buf[2*nCont:]
+
+	types := buf[:nCont]
+	buf = buf[nCont:]
+
+	header := uint32(FROZEN_COOKIE|(nCont << 15))
+	binary.LittleEndian.PutUint32(buf[:4], header)
+
+	copy(keys, bm.highlowcontainer.keys[:])
+
+	for i, c := range containers {
+		switch v := c.(type) {
+		case *bitmapContainer:
+			copy(bitsArena, v.bitmap)
+			bitsArena = bitsArena[1024:]
+			counts[i] = uint16(v.cardinality-1)
+			types[i] = 1
+		case *arrayContainer:
+			copy(arraysArena, v.content)
+			arraysArena = arraysArena[len(v.content):]
+			elems := len(v.content)
+			counts[i] = uint16(elems-1)
+			types[i] = 2
+		case *runContainer16:
+			copy(runsArena, v.iv)
+			runs := len(v.iv)
+			runsArena = runsArena[runs:]
+			counts[i] = uint16(runs)
+			types[i] = 3
+		}
+	}
+
+	return serialSize, nil
+}
--- a/vendor/github.com/RoaringBitmap/roaring/serializationfuzz.go
+++ b/vendor/github.com/RoaringBitmap/roaring/serializationfuzz.go
@@ -0,0 +1,21 @@
+// +build gofuzz
+
+package roaring
+
+import "bytes"
+
+func FuzzSerializationStream(data []byte) int {
+	newrb := NewBitmap()
+	if _, err := newrb.ReadFrom(bytes.NewReader(data)); err != nil {
+		return 0
+	}
+	return 1
+}
+
+func FuzzSerializationBuffer(data []byte) int {
+	newrb := NewBitmap()
+	if _, err := newrb.FromBuffer(data); err != nil {
+		return 0
+	}
+	return 1
+}
--- a/vendor/github.com/RoaringBitmap/roaring/setutil.go
+++ b/vendor/github.com/RoaringBitmap/roaring/setutil.go
@@ -0,0 +1,550 @@
+package roaring
+
+func equal(a, b []uint16) bool {
+	if len(a) != len(b) {
+		return false
+	}
+	for i := range a {
+		if a[i] != b[i] {
+			return false
+		}
+	}
+	return true
+}
+
+func difference(set1 []uint16, set2 []uint16, buffer []uint16) int {
+	if 0 == len(set2) {
+		buffer = buffer[:len(set1)]
+		for k := 0; k < len(set1); k++ {
+			buffer[k] = set1[k]
+		}
+		return len(set1)
+	}
+	if 0 == len(set1) {
+		return 0
+	}
+	pos := 0
+	k1 := 0
+	k2 := 0
+	buffer = buffer[:cap(buffer)]
+	s1 := set1[k1]
+	s2 := set2[k2]
+	for {
+		if s1 < s2 {
+			buffer[pos] = s1
+			pos++
+			k1++
+			if k1 >= len(set1) {
+				break
+			}
+			s1 = set1[k1]
+		} else if s1 == s2 {
+			k1++
+			k2++
+			if k1 >= len(set1) {
+				break
+			}
+			s1 = set1[k1]
+			if k2 >= len(set2) {
+				for ; k1 < len(set1); k1++ {
+					buffer[pos] = set1[k1]
+					pos++
+				}
+				break
+			}
+			s2 = set2[k2]
+		} else { // if (val1>val2)
+			k2++
+			if k2 >= len(set2) {
+				for ; k1 < len(set1); k1++ {
+					buffer[pos] = set1[k1]
+					pos++
+				}
+				break
+			}
+			s2 = set2[k2]
+		}
+	}
+	return pos
+
+}
+
+func exclusiveUnion2by2(set1 []uint16, set2 []uint16, buffer []uint16) int {
+	if 0 == len(set2) {
+		buffer = buffer[:len(set1)]
+		copy(buffer, set1[:])
+		return len(set1)
+	}
+	if 0 == len(set1) {
+		buffer = buffer[:len(set2)]
+		copy(buffer, set2[:])
+		return len(set2)
+	}
+	pos := 0
+	k1 := 0
+	k2 := 0
+	s1 := set1[k1]
+	s2 := set2[k2]
+	buffer = buffer[:cap(buffer)]
+	for {
+		if s1 < s2 {
+			buffer[pos] = s1
+			pos++
+			k1++
+			if k1 >= len(set1) {
+				for ; k2 < len(set2); k2++ {
+					buffer[pos] = set2[k2]
+					pos++
+				}
+				break
+			}
+			s1 = set1[k1]
+		} else if s1 == s2 {
+			k1++
+			k2++
+			if k1 >= len(set1) {
+				for ; k2 < len(set2); k2++ {
+					buffer[pos] = set2[k2]
+					pos++
+				}
+				break
+			}
+			if k2 >= len(set2) {
+				for ; k1 < len(set1); k1++ {
+					buffer[pos] = set1[k1]
+					pos++
+				}
+				break
+			}
+			s1 = set1[k1]
+			s2 = set2[k2]
+		} else { // if (val1>val2)
+			buffer[pos] = s2
+			pos++
+			k2++
+			if k2 >= len(set2) {
+				for ; k1 < len(set1); k1++ {
+					buffer[pos] = set1[k1]
+					pos++
+				}
+				break
+			}
+			s2 = set2[k2]
+		}
+	}
+	return pos
+}
+
+func union2by2Cardinality(set1 []uint16, set2 []uint16) int {
+	pos := 0
+	k1 := 0
+	k2 := 0
+	if 0 == len(set2) {
+		return len(set1)
+	}
+	if 0 == len(set1) {
+		return len(set2)
+	}
+	s1 := set1[k1]
+	s2 := set2[k2]
+	for {
+		if s1 < s2 {
+			pos++
+			k1++
+			if k1 >= len(set1) {
+				pos += len(set2) - k2
+				break
+			}
+			s1 = set1[k1]
+		} else if s1 == s2 {
+			pos++
+			k1++
+			k2++
+			if k1 >= len(set1) {
+				pos += len(set2) - k2
+				break
+			}
+			if k2 >= len(set2) {
+				pos += len(set1) - k1
+				break
+			}
+			s1 = set1[k1]
+			s2 = set2[k2]
+		} else { // if (set1[k1]>set2[k2])
+			pos++
+			k2++
+			if k2 >= len(set2) {
+				pos += len(set1) - k1
+				break
+			}
+			s2 = set2[k2]
+		}
+	}
+	return pos
+}
+
+func intersection2by2(
+	set1 []uint16,
+	set2 []uint16,
+	buffer []uint16) int {
+
+	if len(set1)*64 < len(set2) {
+		return onesidedgallopingintersect2by2(set1, set2, buffer)
+	} else if len(set2)*64 < len(set1) {
+		return onesidedgallopingintersect2by2(set2, set1, buffer)
+	} else {
+		return localintersect2by2(set1, set2, buffer)
+	}
+}
+
+func intersection2by2Cardinality(
+	set1 []uint16,
+	set2 []uint16) int {
+
+	if len(set1)*64 < len(set2) {
+		return onesidedgallopingintersect2by2Cardinality(set1, set2)
+	} else if len(set2)*64 < len(set1) {
+		return onesidedgallopingintersect2by2Cardinality(set2, set1)
+	} else {
+		return localintersect2by2Cardinality(set1, set2)
+	}
+}
+
+func intersects2by2(
+	set1 []uint16,
+	set2 []uint16) bool {
+	// could be optimized if one set is much larger than the other one
+	if (0 == len(set1)) || (0 == len(set2)) {
+		return false
+	}
+	k1 := 0
+	k2 := 0
+	s1 := set1[k1]
+	s2 := set2[k2]
+mainwhile:
+	for {
+
+		if s2 < s1 {
+			for {
+				k2++
+				if k2 == len(set2) {
+					break mainwhile
+				}
+				s2 = set2[k2]
+				if s2 >= s1 {
+					break
+				}
+			}
+		}
+		if s1 < s2 {
+			for {
+				k1++
+				if k1 == len(set1) {
+					break mainwhile
+				}
+				s1 = set1[k1]
+				if s1 >= s2 {
+					break
+				}
+			}
+
+		} else {
+			// (set2[k2] == set1[k1])
+			return true
+		}
+	}
+	return false
+}
+
+func localintersect2by2(
+	set1 []uint16,
+	set2 []uint16,
+	buffer []uint16) int {
+
+	if (0 == len(set1)) || (0 == len(set2)) {
+		return 0
+	}
+	k1 := 0
+	k2 := 0
+	pos := 0
+	buffer = buffer[:cap(buffer)]
+	s1 := set1[k1]
+	s2 := set2[k2]
+mainwhile:
+	for {
+		if s2 < s1 {
+			for {
+				k2++
+				if k2 == len(set2) {
+					break mainwhile
+				}
+				s2 = set2[k2]
+				if s2 >= s1 {
+					break
+				}
+			}
+		}
+		if s1 < s2 {
+			for {
+				k1++
+				if k1 == len(set1) {
+					break mainwhile
+				}
+				s1 = set1[k1]
+				if s1 >= s2 {
+					break
+				}
+			}
+
+		} else {
+			// (set2[k2] == set1[k1])
+			buffer[pos] = s1
+			pos++
+			k1++
+			if k1 == len(set1) {
+				break
+			}
+			s1 = set1[k1]
+			k2++
+			if k2 == len(set2) {
+				break
+			}
+			s2 = set2[k2]
+		}
+	}
+	return pos
+}
+
+func localintersect2by2Cardinality(
+	set1 []uint16,
+	set2 []uint16) int {
+
+	if (0 == len(set1)) || (0 == len(set2)) {
+		return 0
+	}
+	k1 := 0
+	k2 := 0
+	pos := 0
+	s1 := set1[k1]
+	s2 := set2[k2]
+mainwhile:
+	for {
+		if s2 < s1 {
+			for {
+				k2++
+				if k2 == len(set2) {
+					break mainwhile
+				}
+				s2 = set2[k2]
+				if s2 >= s1 {
+					break
+				}
+			}
+		}
+		if s1 < s2 {
+			for {
+				k1++
+				if k1 == len(set1) {
+					break mainwhile
+				}
+				s1 = set1[k1]
+				if s1 >= s2 {
+					break
+				}
+			}
+
+		} else {
+			// (set2[k2] == set1[k1])
+			pos++
+			k1++
+			if k1 == len(set1) {
+				break
+			}
+			s1 = set1[k1]
+			k2++
+			if k2 == len(set2) {
+				break
+			}
+			s2 = set2[k2]
+		}
+	}
+	return pos
+}
+
+func advanceUntil(
+	array []uint16,
+	pos int,
+	length int,
+	min uint16) int {
+	lower := pos + 1
+
+	if lower >= length || array[lower] >= min {
+		return lower
+	}
+
+	spansize := 1
+
+	for lower+spansize < length && array[lower+spansize] < min {
+		spansize *= 2
+	}
+	var upper int
+	if lower+spansize < length {
+		upper = lower + spansize
+	} else {
+		upper = length - 1
+	}
+
+	if array[upper] == min {
+		return upper
+	}
+
+	if array[upper] < min {
+		// means
+		// array
+		// has no
+		// item
+		// >= min
+		// pos = array.length;
+		return length
+	}
+
+	// we know that the next-smallest span was too small
+	lower += (spansize >> 1)
+
+	mid := 0
+	for lower+1 != upper {
+		mid = (lower + upper) >> 1
+		if array[mid] == min {
+			return mid
+		} else if array[mid] < min {
+			lower = mid
+		} else {
+			upper = mid
+		}
+	}
+	return upper
+
+}
+
+func onesidedgallopingintersect2by2(
+	smallset []uint16,
+	largeset []uint16,
+	buffer []uint16) int {
+
+	if 0 == len(smallset) {
+		return 0
+	}
+	buffer = buffer[:cap(buffer)]
+	k1 := 0
+	k2 := 0
+	pos := 0
+	s1 := largeset[k1]
+	s2 := smallset[k2]
+mainwhile:
+
+	for {
+		if s1 < s2 {
+			k1 = advanceUntil(largeset, k1, len(largeset), s2)
+			if k1 == len(largeset) {
+				break mainwhile
+			}
+			s1 = largeset[k1]
+		}
+		if s2 < s1 {
+			k2++
+			if k2 == len(smallset) {
+				break mainwhile
+			}
+			s2 = smallset[k2]
+		} else {
+
+			buffer[pos] = s2
+			pos++
+			k2++
+			if k2 == len(smallset) {
+				break
+			}
+			s2 = smallset[k2]
+			k1 = advanceUntil(largeset, k1, len(largeset), s2)
+			if k1 == len(largeset) {
+				break mainwhile
+			}
+			s1 = largeset[k1]
+		}
+
+	}
+	return pos
+}
+
+func onesidedgallopingintersect2by2Cardinality(
+	smallset []uint16,
+	largeset []uint16) int {
+
+	if 0 == len(smallset) {
+		return 0
+	}
+	k1 := 0
+	k2 := 0
+	pos := 0
+	s1 := largeset[k1]
+	s2 := smallset[k2]
+mainwhile:
+
+	for {
+		if s1 < s2 {
+			k1 = advanceUntil(largeset, k1, len(largeset), s2)
+			if k1 == len(largeset) {
+				break mainwhile
+			}
+			s1 = largeset[k1]
+		}
+		if s2 < s1 {
+			k2++
+			if k2 == len(smallset) {
+				break mainwhile
+			}
+			s2 = smallset[k2]
+		} else {
+
+			pos++
+			k2++
+			if k2 == len(smallset) {
+				break
+			}
+			s2 = smallset[k2]
+			k1 = advanceUntil(largeset, k1, len(largeset), s2)
+			if k1 == len(largeset) {
+				break mainwhile
+			}
+			s1 = largeset[k1]
+		}
+
+	}
+	return pos
+}
+
+func binarySearch(array []uint16, ikey uint16) int {
+	low := 0
+	high := len(array) - 1
+	for low+16 <= high {
+		middleIndex := int(uint32(low+high) >> 1)
+		middleValue := array[middleIndex]
+		if middleValue < ikey {
+			low = middleIndex + 1
+		} else if middleValue > ikey {
+			high = middleIndex - 1
+		} else {
+			return middleIndex
+		}
+	}
+	for ; low <= high; low++ {
+		val := array[low]
+		if val >= ikey {
+			if val == ikey {
+				return low
+			}
+			break
+		}
+	}
+	return -(low + 1)
+}
--- a/vendor/github.com/RoaringBitmap/roaring/setutil_arm64.go
+++ b/vendor/github.com/RoaringBitmap/roaring/setutil_arm64.go
@@ -0,0 +1,6 @@
+// +build arm64,!gccgo,!appengine
+
+package roaring
+
+//go:noescape
+func union2by2(set1 []uint16, set2 []uint16, buffer []uint16) (size int)
--- a/vendor/github.com/RoaringBitmap/roaring/setutil_arm64.s
+++ b/vendor/github.com/RoaringBitmap/roaring/setutil_arm64.s
@@ -0,0 +1,132 @@
+// +build arm64,!gccgo,!appengine
+
+#include "textflag.h"
+
+
+// This implements union2by2 using golang's version of arm64 assembly
+// The algorithm is very similar to the generic one,
+// but makes better use of arm64 features so is notably faster.
+// The basic algorithm structure is as follows:
+// 1. If either set is empty, copy the other set into the buffer and return the length
+// 2. Otherwise, load the first element of each set into a variable (s1 and s2).
+// 3. a. Compare the values of s1 and s2.
+ // b. add the smaller one to the buffer.
+ // c. perform a bounds check before incrementing.
+ // If one set is finished, copy the rest of the other set over.
+ // d. update s1 and or s2 to the next value, continue loop.
+ //
+ // Past the fact of the algorithm, this code makes use of several arm64 features
+ // Condition Codes:
+ // arm64's CMP operation sets 4 bits that can be used for branching,
+ // rather than just true or false.
+ // As a consequence, a single comparison gives enough information to distinguish the three cases
+ //
+ // Post-increment pointers after load/store:
+ // Instructions like `MOVHU.P 2(R0), R6`
+ // increment the register by a specified amount, in this example 2.
+ // Because uint16's are exactly 2 bytes and the length of the slices
+ // is part of the slice header,
+ // there is no need to separately track the index into the slice.
+ // Instead, the code can calculate the final read value and compare against that,
+ // using the post-increment reads to move the pointers along.
+ //
+ // TODO: CALL out to memmove once the list is exhausted.
+ // Right now it moves the necessary shorts so that the remaining count
+ // is a multiple of 4 and then copies 64 bits at a time.
+
+TEXT ·union2by2(SB), NOSPLIT, $0-80
+	// R0, R1, and R2 for the pointers to the three slices
+	MOVD set1+0(FP), R0
+	MOVD set2+24(FP), R1
+	MOVD buffer+48(FP), R2
+
+	//R3 and R4 will be the values at which we will have finished reading set1 and set2.
+	// R3 should be R0 + 2 * set1_len+8(FP)
+	MOVD set1_len+8(FP), R3
+	MOVD set2_len+32(FP), R4
+
+	ADD R3<<1, R0, R3
+	ADD R4<<1, R1, R4
+
+
+	//Rather than counting the number of elements added separately
+	//Save the starting register of buffer.
+	MOVD buffer+48(FP), R5
+
+	// set1 is empty, just flush set2
+	CMP R0, R3
+	BEQ flush_right
+
+	// set2 is empty, just flush set1
+	CMP R1, R4
+	BEQ flush_left
+
+	// R6, R7 are the working space for s1 and s2
+	MOVD ZR, R6
+	MOVD ZR, R7
+
+	MOVHU.P 2(R0), R6
+	MOVHU.P 2(R1), R7
+loop:
+
+	CMP R6, R7
+	BEQ pop_both // R6 == R7
+	BLS pop_right // R6 > R7
+//pop_left: // R6 < R7
+	MOVHU.P R6, 2(R2)
+	CMP R0, R3
+	BEQ pop_then_flush_right
+	MOVHU.P 2(R0), R6
+	JMP loop
+pop_both:
+	MOVHU.P R6, 2(R2) //could also use R7, since they are equal
+	CMP R0, R3
+	BEQ flush_right
+	CMP R1, R4
+	BEQ flush_left
+	MOVHU.P 2(R0), R6
+	MOVHU.P 2(R1), R7
+	JMP loop
+pop_right:
+	MOVHU.P R7, 2(R2)
+	CMP R1, R4
+	BEQ pop_then_flush_left
+	MOVHU.P 2(R1), R7
+	JMP loop
+
+pop_then_flush_right:
+	MOVHU.P R7, 2(R2)
+flush_right:
+	MOVD R1, R0
+	MOVD R4, R3
+	JMP flush_left
+pop_then_flush_left:
+	MOVHU.P R6, 2(R2)
+flush_left:
+	CMP R0, R3
+	BEQ return
+	//figure out how many bytes to slough off. Must be a multiple of two
+	SUB R0, R3, R4
+	ANDS $6, R4
+	BEQ long_flush //handles the 0 mod 8 case
+	SUBS $4, R4, R4 // since possible values are 2, 4, 6, this splits evenly
+	BLT pop_single  // exactly the 2 case
+	MOVW.P 4(R0), R6
+	MOVW.P R6, 4(R2)
+	BEQ long_flush // we're now aligned by 64 bits, as R4==4, otherwise 2 more
+pop_single:
+	MOVHU.P 2(R0), R6
+	MOVHU.P R6, 2(R2)
+long_flush:
+	// at this point we know R3 - R0 is a multiple of 8.
+	CMP R0, R3
+	BEQ return
+	MOVD.P 8(R0), R6
+	MOVD.P R6, 8(R2)
+	JMP long_flush
+return:
+	// number of shorts written is (R5 - R2) >> 1
+	SUB R5, R2
+	LSR $1, R2, R2
+	MOVD R2, size+72(FP)
+	RET
--- a/vendor/github.com/RoaringBitmap/roaring/setutil_generic.go
+++ b/vendor/github.com/RoaringBitmap/roaring/setutil_generic.go
@@ -0,0 +1,63 @@
+// +build !arm64 gccgo appengine
+
+package roaring
+
+func union2by2(set1 []uint16, set2 []uint16, buffer []uint16) int {
+	pos := 0
+	k1 := 0
+	k2 := 0
+	if 0 == len(set2) {
+		buffer = buffer[:len(set1)]
+		copy(buffer, set1[:])
+		return len(set1)
+	}
+	if 0 == len(set1) {
+		buffer = buffer[:len(set2)]
+		copy(buffer, set2[:])
+		return len(set2)
+	}
+	s1 := set1[k1]
+	s2 := set2[k2]
+	buffer = buffer[:cap(buffer)]
+	for {
+		if s1 < s2 {
+			buffer[pos] = s1
+			pos++
+			k1++
+			if k1 >= len(set1) {
+				copy(buffer[pos:], set2[k2:])
+				pos += len(set2) - k2
+				break
+			}
+			s1 = set1[k1]
+		} else if s1 == s2 {
+			buffer[pos] = s1
+			pos++
+			k1++
+			k2++
+			if k1 >= len(set1) {
+				copy(buffer[pos:], set2[k2:])
+				pos += len(set2) - k2
+				break
+			}
+			if k2 >= len(set2) {
+				copy(buffer[pos:], set1[k1:])
+				pos += len(set1) - k1
+				break
+			}
+			s1 = set1[k1]
+			s2 = set2[k2]
+		} else { // if (set1[k1]>set2[k2])
+			buffer[pos] = s2
+			pos++
+			k2++
+			if k2 >= len(set2) {
+				copy(buffer[pos:], set1[k1:])
+				pos += len(set1) - k1
+				break
+			}
+			s2 = set2[k2]
+		}
+	}
+	return pos
+}
--- a/vendor/github.com/RoaringBitmap/roaring/shortiterator.go
+++ b/vendor/github.com/RoaringBitmap/roaring/shortiterator.go
@@ -0,0 +1,52 @@
+package roaring
+
+type shortIterable interface {
+	hasNext() bool
+	next() uint16
+}
+
+type shortPeekable interface {
+	shortIterable
+	peekNext() uint16
+	advanceIfNeeded(minval uint16)
+}
+
+type shortIterator struct {
+	slice []uint16
+	loc   int
+}
+
+func (si *shortIterator) hasNext() bool {
+	return si.loc < len(si.slice)
+}
+
+func (si *shortIterator) next() uint16 {
+	a := si.slice[si.loc]
+	si.loc++
+	return a
+}
+
+func (si *shortIterator) peekNext() uint16 {
+	return si.slice[si.loc]
+}
+
+func (si *shortIterator) advanceIfNeeded(minval uint16) {
+	if si.hasNext() && si.peekNext() < minval {
+		si.loc = advanceUntil(si.slice, si.loc, len(si.slice), minval)
+	}
+}
+
+type reverseIterator struct {
+	slice []uint16
+	loc   int
+}
+
+func (si *reverseIterator) hasNext() bool {
+	return si.loc >= 0
+}
+
+func (si *reverseIterator) next() uint16 {
+	a := si.slice[si.loc]
+	si.loc--
+	return a
+}
--- a/vendor/github.com/RoaringBitmap/roaring/smat.go
+++ b/vendor/github.com/RoaringBitmap/roaring/smat.go
@@ -0,0 +1,383 @@
+// +build gofuzz
+
+/*
+# Instructions for smat testing for roaring
+
+[smat](https://github.com/mschoch/smat) is a framework that provides
+state machine assisted fuzz testing.
+
+To run the smat tests for roaring...
+
+## Prerequisites
+
+    $ go get github.com/dvyukov/go-fuzz/go-fuzz
+    $ go get github.com/dvyukov/go-fuzz/go-fuzz-build
+
+## Steps
+
+1.  Generate initial smat corpus:
+```
+    go test -tags=gofuzz -run=TestGenerateSmatCorpus
+```
+
+2.  Build go-fuzz test program with instrumentation:
+```
+    go-fuzz-build -func FuzzSmat github.com/RoaringBitmap/roaring
+```
+
+3.  Run go-fuzz:
+```
+    go-fuzz -bin=./roaring-fuzz.zip -workdir=workdir/ -timeout=200
+```
+
+You should see output like...
+```
+2016/09/16 13:58:35 slaves: 8, corpus: 1 (3s ago), crashers: 0, restarts: 1/0, execs: 0 (0/sec), cover: 0, uptime: 3s
+2016/09/16 13:58:38 slaves: 8, corpus: 1 (6s ago), crashers: 0, restarts: 1/0, execs: 0 (0/sec), cover: 0, uptime: 6s
+2016/09/16 13:58:41 slaves: 8, corpus: 1 (9s ago), crashers: 0, restarts: 1/44, execs: 44 (5/sec), cover: 0, uptime: 9s
+2016/09/16 13:58:44 slaves: 8, corpus: 1 (12s ago), crashers: 0, restarts: 1/45, execs: 45 (4/sec), cover: 0, uptime: 12s
+2016/09/16 13:58:47 slaves: 8, corpus: 1 (15s ago), crashers: 0, restarts: 1/46, execs: 46 (3/sec), cover: 0, uptime: 15s
+2016/09/16 13:58:50 slaves: 8, corpus: 1 (18s ago), crashers: 0, restarts: 1/47, execs: 47 (3/sec), cover: 0, uptime: 18s
+2016/09/16 13:58:53 slaves: 8, corpus: 1 (21s ago), crashers: 0, restarts: 1/63, execs: 63 (3/sec), cover: 0, uptime: 21s
+2016/09/16 13:58:56 slaves: 8, corpus: 1 (24s ago), crashers: 0, restarts: 1/65, execs: 65 (3/sec), cover: 0, uptime: 24s
+2016/09/16 13:58:59 slaves: 8, corpus: 1 (27s ago), crashers: 0, restarts: 1/66, execs: 66 (2/sec), cover: 0, uptime: 27s
+2016/09/16 13:59:02 slaves: 8, corpus: 1 (30s ago), crashers: 0, restarts: 1/67, execs: 67 (2/sec), cover: 0, uptime: 30s
+2016/09/16 13:59:05 slaves: 8, corpus: 1 (33s ago), crashers: 0, restarts: 1/83, execs: 83 (3/sec), cover: 0, uptime: 33s
+2016/09/16 13:59:08 slaves: 8, corpus: 1 (36s ago), crashers: 0, restarts: 1/84, execs: 84 (2/sec), cover: 0, uptime: 36s
+2016/09/16 13:59:11 slaves: 8, corpus: 2 (0s ago), crashers: 0, restarts: 1/85, execs: 85 (2/sec), cover: 0, uptime: 39s
+2016/09/16 13:59:14 slaves: 8, corpus: 17 (2s ago), crashers: 0, restarts: 1/86, execs: 86 (2/sec), cover: 480, uptime: 42s
+2016/09/16 13:59:17 slaves: 8, corpus: 17 (5s ago), crashers: 0, restarts: 1/66, execs: 132 (3/sec), cover: 487, uptime: 45s
+2016/09/16 13:59:20 slaves: 8, corpus: 17 (8s ago), crashers: 0, restarts: 1/440, execs: 2645 (55/sec), cover: 487, uptime: 48s
+
+```
+
+Let it run, and if the # of crashers is > 0, check out the reports in
+the workdir where you should be able to find the panic goroutine stack
+traces.
+*/
+
+package roaring
+
+import (
+	"fmt"
+	"sort"
+
+	"github.com/mschoch/smat"
+	"github.com/bits-and-blooms/bitset"
+)
+
+// fuzz test using state machine driven by byte stream.
+func FuzzSmat(data []byte) int {
+	return smat.Fuzz(&smatContext{}, smat.ActionID('S'), smat.ActionID('T'),
+		smatActionMap, data)
+}
+
+var smatDebug = false
+
+func smatLog(prefix, format string, args ...interface{}) {
+	if smatDebug {
+		fmt.Print(prefix)
+		fmt.Printf(format, args...)
+	}
+}
+
+type smatContext struct {
+	pairs []*smatPair
+
+	// Two registers, x & y.
+	x int
+	y int
+
+	actions int
+}
+
+type smatPair struct {
+	bm *Bitmap
+	bs *bitset.BitSet
+}
+
+// ------------------------------------------------------------------
+
+var smatActionMap = smat.ActionMap{
+	smat.ActionID('X'): smatAction("x++", smatWrap(func(c *smatContext) { c.x++ })),
+	smat.ActionID('x'): smatAction("x--", smatWrap(func(c *smatContext) { c.x-- })),
+	smat.ActionID('Y'): smatAction("y++", smatWrap(func(c *smatContext) { c.y++ })),
+	smat.ActionID('y'): smatAction("y--", smatWrap(func(c *smatContext) { c.y-- })),
+	smat.ActionID('*'): smatAction("x*y", smatWrap(func(c *smatContext) { c.x = c.x * c.y })),
+	smat.ActionID('<'): smatAction("x<<", smatWrap(func(c *smatContext) { c.x = c.x << 1 })),
+
+	smat.ActionID('^'): smatAction("swap", smatWrap(func(c *smatContext) { c.x, c.y = c.y, c.x })),
+
+	smat.ActionID('['): smatAction(" pushPair", smatWrap(smatPushPair)),
+	smat.ActionID(']'): smatAction(" popPair", smatWrap(smatPopPair)),
+
+	smat.ActionID('B'): smatAction(" setBit", smatWrap(smatSetBit)),
+	smat.ActionID('b'): smatAction(" removeBit", smatWrap(smatRemoveBit)),
+
+	smat.ActionID('o'): smatAction(" or", smatWrap(smatOr)),
+	smat.ActionID('a'): smatAction(" and", smatWrap(smatAnd)),
+
+	smat.ActionID('#'): smatAction(" cardinality", smatWrap(smatCardinality)),
+
+	smat.ActionID('O'): smatAction(" orCardinality", smatWrap(smatOrCardinality)),
+	smat.ActionID('A'): smatAction(" andCardinality", smatWrap(smatAndCardinality)),
+
+	smat.ActionID('c'): smatAction(" clear", smatWrap(smatClear)),
+	smat.ActionID('r'): smatAction(" runOptimize", smatWrap(smatRunOptimize)),
+
+	smat.ActionID('e'): smatAction(" isEmpty", smatWrap(smatIsEmpty)),
+
+	smat.ActionID('i'): smatAction(" intersects", smatWrap(smatIntersects)),
+
+	smat.ActionID('f'): smatAction(" flip", smatWrap(smatFlip)),
+
+	smat.ActionID('-'): smatAction(" difference", smatWrap(smatDifference)),
+}
+
+var smatRunningPercentActions []smat.PercentAction
+
+func init() {
+	var ids []int
+	for actionId := range smatActionMap {
+		ids = append(ids, int(actionId))
+	}
+	sort.Ints(ids)
+
+	pct := 100 / len(smatActionMap)
+	for _, actionId := range ids {
+		smatRunningPercentActions = append(smatRunningPercentActions,
+			smat.PercentAction{pct, smat.ActionID(actionId)})
+	}
+
+	smatActionMap[smat.ActionID('S')] = smatAction("SETUP", smatSetupFunc)
+	smatActionMap[smat.ActionID('T')] = smatAction("TEARDOWN", smatTeardownFunc)
+}
+
+// We only have one smat state: running.
+func smatRunning(next byte) smat.ActionID {
+	return smat.PercentExecute(next, smatRunningPercentActions...)
+}
+
+func smatAction(name string, f func(ctx smat.Context) (smat.State, error)) func(smat.Context) (smat.State, error) {
+	return func(ctx smat.Context) (smat.State, error) {
+		c := ctx.(*smatContext)
+		c.actions++
+
+		smatLog("  ", "%s\n", name)
+
+		return f(ctx)
+	}
+}
+
+// Creates an smat action func based on a simple callback.
+func smatWrap(cb func(c *smatContext)) func(smat.Context) (next smat.State, err error) {
+	return func(ctx smat.Context) (next smat.State, err error) {
+		c := ctx.(*smatContext)
+		cb(c)
+		return smatRunning, nil
+	}
+}
+
+// Invokes a callback function with the input v bounded to len(c.pairs).
+func (c *smatContext) withPair(v int, cb func(*smatPair)) {
+	if len(c.pairs) > 0 {
+		if v < 0 {
+			v = -v
+		}
+		v = v % len(c.pairs)
+		cb(c.pairs[v])
+	}
+}
+
+// ------------------------------------------------------------------
+
+func smatSetupFunc(ctx smat.Context) (next smat.State, err error) {
+	return smatRunning, nil
+}
+
+func smatTeardownFunc(ctx smat.Context) (next smat.State, err error) {
+	return nil, err
+}
+
+// ------------------------------------------------------------------
+
+func smatPushPair(c *smatContext) {
+	c.pairs = append(c.pairs, &smatPair{
+		bm: NewBitmap(),
+		bs: bitset.New(100),
+	})
+}
+
+func smatPopPair(c *smatContext) {
+	if len(c.pairs) > 0 {
+		c.pairs = c.pairs[0 : len(c.pairs)-1]
+	}
+}
+
+func smatSetBit(c *smatContext) {
+	c.withPair(c.x, func(p *smatPair) {
+		y := uint32(c.y)
+		p.bm.AddInt(int(y))
+		p.bs.Set(uint(y))
+		p.checkEquals()
+	})
+}
+
+func smatRemoveBit(c *smatContext) {
+	c.withPair(c.x, func(p *smatPair) {
+		y := uint32(c.y)
+		p.bm.Remove(y)
+		p.bs.Clear(uint(y))
+		p.checkEquals()
+	})
+}
+
+func smatAnd(c *smatContext) {
+	c.withPair(c.x, func(px *smatPair) {
+		c.withPair(c.y, func(py *smatPair) {
+			px.bm.And(py.bm)
+			px.bs = px.bs.Intersection(py.bs)
+			px.checkEquals()
+			py.checkEquals()
+		})
+	})
+}
+
+func smatOr(c *smatContext) {
+	c.withPair(c.x, func(px *smatPair) {
+		c.withPair(c.y, func(py *smatPair) {
+			px.bm.Or(py.bm)
+			px.bs = px.bs.Union(py.bs)
+			px.checkEquals()
+			py.checkEquals()
+		})
+	})
+}
+
+func smatAndCardinality(c *smatContext) {
+	c.withPair(c.x, func(px *smatPair) {
+		c.withPair(c.y, func(py *smatPair) {
+			c0 := px.bm.AndCardinality(py.bm)
+			c1 := px.bs.IntersectionCardinality(py.bs)
+			if c0 != uint64(c1) {
+				panic("expected same add cardinality")
+			}
+			px.checkEquals()
+			py.checkEquals()
+		})
+	})
+}
+
+func smatOrCardinality(c *smatContext) {
+	c.withPair(c.x, func(px *smatPair) {
+		c.withPair(c.y, func(py *smatPair) {
+			c0 := px.bm.OrCardinality(py.bm)
+			c1 := px.bs.UnionCardinality(py.bs)
+			if c0 != uint64(c1) {
+				panic("expected same or cardinality")
+			}
+			px.checkEquals()
+			py.checkEquals()
+		})
+	})
+}
+
+func smatRunOptimize(c *smatContext) {
+	c.withPair(c.x, func(px *smatPair) {
+		px.bm.RunOptimize()
+		px.checkEquals()
+	})
+}
+
+func smatClear(c *smatContext) {
+	c.withPair(c.x, func(px *smatPair) {
+		px.bm.Clear()
+		px.bs = px.bs.ClearAll()
+		px.checkEquals()
+	})
+}
+
+func smatCardinality(c *smatContext) {
+	c.withPair(c.x, func(px *smatPair) {
+		c0 := px.bm.GetCardinality()
+		c1 := px.bs.Count()
+		if c0 != uint64(c1) {
+			panic("expected same cardinality")
+		}
+	})
+}
+
+func smatIsEmpty(c *smatContext) {
+	c.withPair(c.x, func(px *smatPair) {
+		c0 := px.bm.IsEmpty()
+		c1 := px.bs.None()
+		if c0 != c1 {
+			panic("expected same is empty")
+		}
+	})
+}
+
+func smatIntersects(c *smatContext) {
+	c.withPair(c.x, func(px *smatPair) {
+		c.withPair(c.y, func(py *smatPair) {
+			v0 := px.bm.Intersects(py.bm)
+			v1 := px.bs.IntersectionCardinality(py.bs) > 0
+			if v0 != v1 {
+				panic("intersects not equal")
+			}
+
+			px.checkEquals()
+			py.checkEquals()
+		})
+	})
+}
+
+func smatFlip(c *smatContext) {
+	c.withPair(c.x, func(p *smatPair) {
+		y := uint32(c.y)
+		p.bm.Flip(uint64(y), uint64(y)+1)
+		p.bs = p.bs.Flip(uint(y))
+		p.checkEquals()
+	})
+}
+
+func smatDifference(c *smatContext) {
+	c.withPair(c.x, func(px *smatPair) {
+		c.withPair(c.y, func(py *smatPair) {
+			px.bm.AndNot(py.bm)
+			px.bs = px.bs.Difference(py.bs)
+			px.checkEquals()
+			py.checkEquals()
+		})
+	})
+}
+
+func (p *smatPair) checkEquals() {
+	if !p.equalsBitSet(p.bs, p.bm) {
+		panic("bitset mismatch")
+	}
+}
+
+func (p *smatPair) equalsBitSet(a *bitset.BitSet, b *Bitmap) bool {
+	for i, e := a.NextSet(0); e; i, e = a.NextSet(i + 1) {
+		if !b.ContainsInt(int(i)) {
+			fmt.Printf("in a bitset, not b bitmap, i: %d\n", i)
+			fmt.Printf("  a bitset: %s\n  b bitmap: %s\n",
+				a.String(), b.String())
+			return false
+		}
+	}
+
+	i := b.Iterator()
+	for i.HasNext() {
+		v := i.Next()
+		if !a.Test(uint(v)) {
+			fmt.Printf("in b bitmap, not a bitset, v: %d\n", v)
+			fmt.Printf("  a bitset: %s\n  b bitmap: %s\n",
+				a.String(), b.String())
+			return false
+		}
+	}
+
+	return true
+}
--- a/vendor/github.com/RoaringBitmap/roaring/util.go
+++ b/vendor/github.com/RoaringBitmap/roaring/util.go
@@ -0,0 +1,305 @@
+package roaring
+
+import (
+	"math"
+	"math/rand"
+	"sort"
+)
+
+const (
+	arrayDefaultMaxSize        = 4096 // containers with 4096 or fewer integers should be array containers.
+	arrayLazyLowerBound        = 1024
+	maxCapacity                = 1 << 16
+	serialCookieNoRunContainer = 12346 // only arrays and bitmaps
+	invalidCardinality         = -1
+	serialCookie               = 12347 // runs, arrays, and bitmaps
+	noOffsetThreshold          = 4
+
+	// MaxUint32 is the largest uint32 value.
+	MaxUint32 = math.MaxUint32
+
+	// MaxRange is One more than the maximum allowed bitmap bit index. For use as an upper
+	// bound for ranges.
+	MaxRange uint64 = MaxUint32 + 1
+
+	// MaxUint16 is the largest 16 bit unsigned int.
+	// This is the largest value an interval16 can store.
+	MaxUint16 = math.MaxUint16
+
+	// Compute wordSizeInBytes, the size of a word in bytes.
+	_m              = ^uint64(0)
+	_logS           = _m>>8&1 + _m>>16&1 + _m>>32&1
+	wordSizeInBytes = 1 << _logS
+
+	// other constants used in ctz_generic.go
+	wordSizeInBits = wordSizeInBytes << 3 // word size in bits
+)
+
+const maxWord = 1<<wordSizeInBits - 1
+
+// doesn't apply to runContainers
+func getSizeInBytesFromCardinality(card int) int {
+	if card > arrayDefaultMaxSize {
+		// bitmapContainer
+		return maxCapacity / 8
+	}
+	// arrayContainer
+	return 2 * card
+}
+
+func fill(arr []uint64, val uint64) {
+	for i := range arr {
+		arr[i] = val
+	}
+}
+func fillRange(arr []uint64, start, end int, val uint64) {
+	for i := start; i < end; i++ {
+		arr[i] = val
+	}
+}
+
+func fillArrayAND(container []uint16, bitmap1, bitmap2 []uint64) {
+	if len(bitmap1) != len(bitmap2) {
+		panic("array lengths don't match")
+	}
+	// TODO: rewrite in assembly
+	pos := 0
+	for k := range bitmap1 {
+		bitset := bitmap1[k] & bitmap2[k]
+		for bitset != 0 {
+			t := bitset & -bitset
+			container[pos] = uint16((k*64 + int(popcount(t-1))))
+			pos = pos + 1
+			bitset ^= t
+		}
+	}
+}
+
+func fillArrayANDNOT(container []uint16, bitmap1, bitmap2 []uint64) {
+	if len(bitmap1) != len(bitmap2) {
+		panic("array lengths don't match")
+	}
+	// TODO: rewrite in assembly
+	pos := 0
+	for k := range bitmap1 {
+		bitset := bitmap1[k] &^ bitmap2[k]
+		for bitset != 0 {
+			t := bitset & -bitset
+			container[pos] = uint16((k*64 + int(popcount(t-1))))
+			pos = pos + 1
+			bitset ^= t
+		}
+	}
+}
+
+func fillArrayXOR(container []uint16, bitmap1, bitmap2 []uint64) {
+	if len(bitmap1) != len(bitmap2) {
+		panic("array lengths don't match")
+	}
+	// TODO: rewrite in assembly
+	pos := 0
+	for k := 0; k < len(bitmap1); k++ {
+		bitset := bitmap1[k] ^ bitmap2[k]
+		for bitset != 0 {
+			t := bitset & -bitset
+			container[pos] = uint16((k*64 + int(popcount(t-1))))
+			pos = pos + 1
+			bitset ^= t
+		}
+	}
+}
+
+func highbits(x uint32) uint16 {
+	return uint16(x >> 16)
+}
+func lowbits(x uint32) uint16 {
+	return uint16(x & maxLowBit)
+}
+
+const maxLowBit = 0xFFFF
+
+func flipBitmapRange(bitmap []uint64, start int, end int) {
+	if start >= end {
+		return
+	}
+	firstword := start / 64
+	endword := (end - 1) / 64
+	bitmap[firstword] ^= ^(^uint64(0) << uint(start%64))
+	for i := firstword; i < endword; i++ {
+		bitmap[i] = ^bitmap[i]
+	}
+	bitmap[endword] ^= ^uint64(0) >> (uint(-end) % 64)
+}
+
+func resetBitmapRange(bitmap []uint64, start int, end int) {
+	if start >= end {
+		return
+	}
+	firstword := start / 64
+	endword := (end - 1) / 64
+	if firstword == endword {
+		bitmap[firstword] &= ^((^uint64(0) << uint(start%64)) & (^uint64(0) >> (uint(-end) % 64)))
+		return
+	}
+	bitmap[firstword] &= ^(^uint64(0) << uint(start%64))
+	for i := firstword + 1; i < endword; i++ {
+		bitmap[i] = 0
+	}
+	bitmap[endword] &= ^(^uint64(0) >> (uint(-end) % 64))
+
+}
+
+func setBitmapRange(bitmap []uint64, start int, end int) {
+	if start >= end {
+		return
+	}
+	firstword := start / 64
+	endword := (end - 1) / 64
+	if firstword == endword {
+		bitmap[firstword] |= (^uint64(0) << uint(start%64)) & (^uint64(0) >> (uint(-end) % 64))
+		return
+	}
+	bitmap[firstword] |= ^uint64(0) << uint(start%64)
+	for i := firstword + 1; i < endword; i++ {
+		bitmap[i] = ^uint64(0)
+	}
+	bitmap[endword] |= ^uint64(0) >> (uint(-end) % 64)
+}
+
+func flipBitmapRangeAndCardinalityChange(bitmap []uint64, start int, end int) int {
+	before := wordCardinalityForBitmapRange(bitmap, start, end)
+	flipBitmapRange(bitmap, start, end)
+	after := wordCardinalityForBitmapRange(bitmap, start, end)
+	return int(after - before)
+}
+
+func resetBitmapRangeAndCardinalityChange(bitmap []uint64, start int, end int) int {
+	before := wordCardinalityForBitmapRange(bitmap, start, end)
+	resetBitmapRange(bitmap, start, end)
+	after := wordCardinalityForBitmapRange(bitmap, start, end)
+	return int(after - before)
+}
+
+func setBitmapRangeAndCardinalityChange(bitmap []uint64, start int, end int) int {
+	before := wordCardinalityForBitmapRange(bitmap, start, end)
+	setBitmapRange(bitmap, start, end)
+	after := wordCardinalityForBitmapRange(bitmap, start, end)
+	return int(after - before)
+}
+
+func wordCardinalityForBitmapRange(bitmap []uint64, start int, end int) uint64 {
+	answer := uint64(0)
+	if start >= end {
+		return answer
+	}
+	firstword := start / 64
+	endword := (end - 1) / 64
+	for i := firstword; i <= endword; i++ {
+		answer += popcount(bitmap[i])
+	}
+	return answer
+}
+
+func selectBitPosition(w uint64, j int) int {
+	seen := 0
+
+	// Divide 64bit
+	part := w & 0xFFFFFFFF
+	n := popcount(part)
+	if n <= uint64(j) {
+		part = w >> 32
+		seen += 32
+		j -= int(n)
+	}
+	w = part
+
+	// Divide 32bit
+	part = w & 0xFFFF
+	n = popcount(part)
+	if n <= uint64(j) {
+		part = w >> 16
+		seen += 16
+		j -= int(n)
+	}
+	w = part
+
+	// Divide 16bit
+	part = w & 0xFF
+	n = popcount(part)
+	if n <= uint64(j) {
+		part = w >> 8
+		seen += 8
+		j -= int(n)
+	}
+	w = part
+
+	// Lookup in final byte
+	var counter uint
+	for counter = 0; counter < 8; counter++ {
+		j -= int((w >> counter) & 1)
+		if j < 0 {
+			break
+		}
+	}
+	return seen + int(counter)
+
+}
+
+func panicOn(err error) {
+	if err != nil {
+		panic(err)
+	}
+}
+
+type ph struct {
+	orig int
+	rand int
+}
+
+type pha []ph
+
+func (p pha) Len() int           { return len(p) }
+func (p pha) Less(i, j int) bool { return p[i].rand < p[j].rand }
+func (p pha) Swap(i, j int)      { p[i], p[j] = p[j], p[i] }
+
+func getRandomPermutation(n int) []int {
+	r := make([]ph, n)
+	for i := 0; i < n; i++ {
+		r[i].orig = i
+		r[i].rand = rand.Intn(1 << 29)
+	}
+	sort.Sort(pha(r))
+	m := make([]int, n)
+	for i := range m {
+		m[i] = r[i].orig
+	}
+	return m
+}
+
+func minOfInt(a, b int) int {
+	if a < b {
+		return a
+	}
+	return b
+}
+
+func maxOfInt(a, b int) int {
+	if a > b {
+		return a
+	}
+	return b
+}
+
+func maxOfUint16(a, b uint16) uint16 {
+	if a > b {
+		return a
+	}
+	return b
+}
+
+func minOfUint16(a, b uint16) uint16 {
+	if a < b {
+		return a
+	}
+	return b
+}