feat: Waku v2 bridge

Issue #12610
This commit is contained in:
Michal Iskierko
2023-11-12 13:29:38 +01:00
parent 56e7bd01ca
commit 6d31343205
6716 changed files with 1982502 additions and 5891 deletions

20
vendor/github.com/RoaringBitmap/roaring/.drone.yml generated vendored Normal file
View File

@@ -0,0 +1,20 @@
kind: pipeline
name: default
workspace:
base: /go
path: src/github.com/RoaringBitmap/roaring
steps:
- name: test
image: golang
commands:
- go get -t
- go test
- go test -race -run TestConcurrent*
- go build -tags appengine
- go test -tags appengine
- GOARCH=386 go build
- GOARCH=386 go test
- GOARCH=arm go build
- GOARCH=arm64 go build

5
vendor/github.com/RoaringBitmap/roaring/.gitignore generated vendored Normal file
View File

@@ -0,0 +1,5 @@
*~
roaring-fuzz.zip
workdir
coverage.out
testdata/all3.classic

0
vendor/github.com/RoaringBitmap/roaring/.gitmodules generated vendored Normal file
View File

32
vendor/github.com/RoaringBitmap/roaring/.travis.yml generated vendored Normal file
View File

@@ -0,0 +1,32 @@
language: go
sudo: false
install:
- go get -t github.com/RoaringBitmap/roaring
- go get -t golang.org/x/tools/cmd/cover
- go get -t github.com/mattn/goveralls
- go get -t github.com/mschoch/smat
notifications:
email: false
go:
- "1.13.x"
- "1.14.x"
- tip
# whitelist
branches:
only:
- master
script:
- goveralls -v -service travis-ci -ignore rle16_gen.go,rle_gen.go,rle.go || go test
- go test -race -run TestConcurrent*
- go build -tags appengine
- go test -tags appengine
- GOARCH=arm64 go build
- GOARCH=386 go build
- GOARCH=386 go test
- GOARCH=arm go build
- GOARCH=arm64 go build
matrix:
allow_failures:
- go: tip

11
vendor/github.com/RoaringBitmap/roaring/AUTHORS generated vendored Normal file
View File

@@ -0,0 +1,11 @@
# This is the official list of roaring authors for copyright purposes.
Todd Gruben (@tgruben),
Daniel Lemire (@lemire),
Elliot Murphy (@statik),
Bob Potter (@bpot),
Tyson Maly (@tvmaly),
Will Glynn (@willglynn),
Brent Pedersen (@brentp)
Maciej Biłas (@maciej),
Joe Nall (@joenall)

View File

@@ -0,0 +1,847 @@
package roaring
import (
"fmt"
"github.com/RoaringBitmap/roaring"
"math/bits"
"runtime"
"sync"
"sync/atomic"
)
const (
// Min64BitSigned - Minimum 64 bit value
Min64BitSigned = -9223372036854775808
// Max64BitSigned - Maximum 64 bit value
Max64BitSigned = 9223372036854775807
)
// BSI is at its simplest is an array of bitmaps that represent an encoded
// binary value. The advantage of a BSI is that comparisons can be made
// across ranges of values whereas a bitmap can only represent the existence
// of a single value for a given column ID. Another usage scenario involves
// storage of high cardinality values.
//
// It depends upon the bitmap libraries. It is not thread safe, so
// upstream concurrency guards must be provided.
type BSI struct {
bA []*roaring.Bitmap
eBM *roaring.Bitmap // Existence BitMap
MaxValue int64
MinValue int64
runOptimized bool
}
// NewBSI constructs a new BSI. Min/Max values are optional. If set to 0
// then the underlying BSI will be automatically sized.
func NewBSI(maxValue int64, minValue int64) *BSI {
bitsz := bits.Len64(uint64(minValue))
if bits.Len64(uint64(maxValue)) > bitsz {
bitsz = bits.Len64(uint64(maxValue))
}
ba := make([]*roaring.Bitmap, bitsz)
for i := 0; i < len(ba); i++ {
ba[i] = roaring.NewBitmap()
}
return &BSI{bA: ba, eBM: roaring.NewBitmap(), MaxValue: maxValue, MinValue: minValue}
}
// NewDefaultBSI constructs an auto-sized BSI
func NewDefaultBSI() *BSI {
return NewBSI(int64(0), int64(0))
}
// RunOptimize attempts to further compress the runs of consecutive values found in the bitmap
func (b *BSI) RunOptimize() {
b.eBM.RunOptimize()
for i := 0; i < len(b.bA); i++ {
b.bA[i].RunOptimize()
}
b.runOptimized = true
}
// HasRunCompression returns true if the bitmap benefits from run compression
func (b *BSI) HasRunCompression() bool {
return b.runOptimized
}
// GetExistenceBitmap returns a pointer to the underlying existence bitmap of the BSI
func (b *BSI) GetExistenceBitmap() *roaring.Bitmap {
return b.eBM
}
// ValueExists tests whether the value exists.
func (b *BSI) ValueExists(columnID uint64) bool {
return b.eBM.Contains(uint32(columnID))
}
// GetCardinality returns a count of unique column IDs for which a value has been set.
func (b *BSI) GetCardinality() uint64 {
return b.eBM.GetCardinality()
}
// BitCount returns the number of bits needed to represent values.
func (b *BSI) BitCount() int {
return len(b.bA)
}
// SetValue sets a value for a given columnID.
func (b *BSI) SetValue(columnID uint64, value int64) {
// If max/min values are set to zero then automatically determine bit array size
if b.MaxValue == 0 && b.MinValue == 0 {
ba := make([]*roaring.Bitmap, bits.Len64(uint64(value)))
for i := len(ba) - b.BitCount(); i > 0; i-- {
b.bA = append(b.bA, roaring.NewBitmap())
if b.runOptimized {
b.bA[i].RunOptimize()
}
}
}
var wg sync.WaitGroup
for i := 0; i < b.BitCount(); i++ {
wg.Add(1)
go func(j int) {
defer wg.Done()
if uint64(value)&(1<<uint64(j)) > 0 {
b.bA[j].Add(uint32(columnID))
} else {
b.bA[j].Remove(uint32(columnID))
}
}(i)
}
wg.Wait()
b.eBM.Add(uint32(columnID))
}
// GetValue gets the value at the column ID. Second param will be false for non-existant values.
func (b *BSI) GetValue(columnID uint64) (int64, bool) {
value := int64(0)
exists := b.eBM.Contains(uint32(columnID))
if !exists {
return value, exists
}
for i := 0; i < b.BitCount(); i++ {
if b.bA[i].Contains(uint32(columnID)) {
value |= (1 << uint64(i))
}
}
return int64(value), exists
}
type action func(t *task, batch []uint32, resultsChan chan *roaring.Bitmap, wg *sync.WaitGroup)
func parallelExecutor(parallelism int, t *task, e action,
foundSet *roaring.Bitmap) *roaring.Bitmap {
var n int = parallelism
if n == 0 {
n = runtime.NumCPU()
}
resultsChan := make(chan *roaring.Bitmap, n)
card := foundSet.GetCardinality()
x := card / uint64(n)
remainder := card - (x * uint64(n))
var batch []uint32
var wg sync.WaitGroup
iter := foundSet.ManyIterator()
for i := 0; i < n; i++ {
if i == n-1 {
batch = make([]uint32, x+remainder)
} else {
batch = make([]uint32, x)
}
iter.NextMany(batch)
wg.Add(1)
go e(t, batch, resultsChan, &wg)
}
wg.Wait()
close(resultsChan)
ba := make([]*roaring.Bitmap, 0)
for bm := range resultsChan {
ba = append(ba, bm)
}
return roaring.ParOr(0, ba...)
}
type bsiAction func(input *BSI, batch []uint32, resultsChan chan *BSI, wg *sync.WaitGroup)
func parallelExecutorBSIResults(parallelism int, input *BSI, e bsiAction, foundSet *roaring.Bitmap, sumResults bool) *BSI {
var n int = parallelism
if n == 0 {
n = runtime.NumCPU()
}
resultsChan := make(chan *BSI, n)
card := foundSet.GetCardinality()
x := card / uint64(n)
remainder := card - (x * uint64(n))
var batch []uint32
var wg sync.WaitGroup
iter := foundSet.ManyIterator()
for i := 0; i < n; i++ {
if i == n-1 {
batch = make([]uint32, x+remainder)
} else {
batch = make([]uint32, x)
}
iter.NextMany(batch)
wg.Add(1)
go e(input, batch, resultsChan, &wg)
}
wg.Wait()
close(resultsChan)
ba := make([]*BSI, 0)
for bm := range resultsChan {
ba = append(ba, bm)
}
results := NewDefaultBSI()
if sumResults {
for _, v := range ba {
results.Add(v)
}
} else {
results.ParOr(0, ba...)
}
return results
}
// Operation identifier
type Operation int
const (
// LT less than
LT Operation = 1 + iota
// LE less than or equal
LE
// EQ equal
EQ
// GE greater than or equal
GE
// GT greater than
GT
// RANGE range
RANGE
// MIN find minimum
MIN
// MAX find maximum
MAX
)
type task struct {
bsi *BSI
op Operation
valueOrStart int64
end int64
values map[int64]struct{}
bits *roaring.Bitmap
}
// CompareValue compares value.
// For all operations with the exception of RANGE, the value to be compared is specified by valueOrStart.
// For the RANGE parameter the comparison criteria is >= valueOrStart and <= end.
// The parallelism parameter indicates the number of CPU threads to be applied for processing. A value
// of zero indicates that all available CPU resources will be potentially utilized.
//
func (b *BSI) CompareValue(parallelism int, op Operation, valueOrStart, end int64,
foundSet *roaring.Bitmap) *roaring.Bitmap {
comp := &task{bsi: b, op: op, valueOrStart: valueOrStart, end: end}
if foundSet == nil {
return parallelExecutor(parallelism, comp, compareValue, b.eBM)
}
return parallelExecutor(parallelism, comp, compareValue, foundSet)
}
func compareValue(e *task, batch []uint32, resultsChan chan *roaring.Bitmap, wg *sync.WaitGroup) {
defer wg.Done()
results := roaring.NewBitmap()
if e.bsi.runOptimized {
results.RunOptimize()
}
x := e.bsi.BitCount()
startIsNegative := x == 64 && uint64(e.valueOrStart)&(1<<uint64(x-1)) > 0
endIsNegative := x == 64 && uint64(e.end)&(1<<uint64(x-1)) > 0
for i := 0; i < len(batch); i++ {
cID := batch[i]
eq1, eq2 := true, true
lt1, lt2, gt1 := false, false, false
j := e.bsi.BitCount() - 1
isNegative := false
if x == 64 {
isNegative = e.bsi.bA[j].Contains(cID)
j--
}
compStartValue := e.valueOrStart
compEndValue := e.end
if isNegative != startIsNegative {
compStartValue = ^e.valueOrStart + 1
}
if isNegative != endIsNegative {
compEndValue = ^e.end + 1
}
for ; j >= 0; j-- {
sliceContainsBit := e.bsi.bA[j].Contains(cID)
if uint64(compStartValue)&(1<<uint64(j)) > 0 {
// BIT in value is SET
if !sliceContainsBit {
if eq1 {
if (e.op == GT || e.op == GE || e.op == RANGE) && startIsNegative && !isNegative {
gt1 = true
}
if e.op == LT || e.op == LE {
if !startIsNegative || (startIsNegative == isNegative) {
lt1 = true
}
}
eq1 = false
break
}
}
} else {
// BIT in value is CLEAR
if sliceContainsBit {
if eq1 {
if (e.op == LT || e.op == LE) && isNegative && !startIsNegative {
lt1 = true
}
if e.op == GT || e.op == GE || e.op == RANGE {
if startIsNegative || (startIsNegative == isNegative) {
gt1 = true
}
}
eq1 = false
if e.op != RANGE {
break
}
}
}
}
if e.op == RANGE && uint64(compEndValue)&(1<<uint64(j)) > 0 {
// BIT in value is SET
if !sliceContainsBit {
if eq2 {
if !endIsNegative || (endIsNegative == isNegative) {
lt2 = true
}
eq2 = false
if startIsNegative && !endIsNegative {
break
}
}
}
} else if e.op == RANGE {
// BIT in value is CLEAR
if sliceContainsBit {
if eq2 {
if isNegative && !endIsNegative {
lt2 = true
}
eq2 = false
break
}
}
}
}
switch e.op {
case LT:
if lt1 {
results.Add(cID)
}
case LE:
if lt1 || (eq1 && (!startIsNegative || (startIsNegative && isNegative))) {
results.Add(cID)
}
case EQ:
if eq1 {
results.Add(cID)
}
case GE:
if gt1 || (eq1 && (startIsNegative || (!startIsNegative && !isNegative))) {
results.Add(cID)
}
case GT:
if gt1 {
results.Add(cID)
}
case RANGE:
if (eq1 || gt1) && (eq2 || lt2) {
results.Add(cID)
}
default:
panic(fmt.Sprintf("Unknown operation [%v]", e.op))
}
}
resultsChan <- results
}
// MinMax - Find minimum or maximum value.
func (b *BSI) MinMax(parallelism int, op Operation, foundSet *roaring.Bitmap) int64 {
var n int = parallelism
if n == 0 {
n = runtime.NumCPU()
}
resultsChan := make(chan int64, n)
card := foundSet.GetCardinality()
x := card / uint64(n)
remainder := card - (x * uint64(n))
var batch []uint32
var wg sync.WaitGroup
iter := foundSet.ManyIterator()
for i := 0; i < n; i++ {
if i == n-1 {
batch = make([]uint32, x+remainder)
} else {
batch = make([]uint32, x)
}
iter.NextMany(batch)
wg.Add(1)
go b.minOrMax(op, batch, resultsChan, &wg)
}
wg.Wait()
close(resultsChan)
var minMax int64
if op == MAX {
minMax = Min64BitSigned
} else {
minMax = Max64BitSigned
}
for val := range resultsChan {
if (op == MAX && val > minMax) || (op == MIN && val < minMax) {
minMax = val
}
}
return minMax
}
func (b *BSI) minOrMax(op Operation, batch []uint32, resultsChan chan int64, wg *sync.WaitGroup) {
defer wg.Done()
x := b.BitCount()
var value int64 = Max64BitSigned
if op == MAX {
value = Min64BitSigned
}
for i := 0; i < len(batch); i++ {
cID := batch[i]
eq := true
lt, gt := false, false
j := b.BitCount() - 1
var cVal int64
valueIsNegative := uint64(value)&(1<<uint64(x-1)) > 0 && bits.Len64(uint64(value)) == 64
isNegative := false
if x == 64 {
isNegative = b.bA[j].Contains(cID)
if isNegative {
cVal |= 1 << uint64(j)
}
j--
}
compValue := value
if isNegative != valueIsNegative {
compValue = ^value + 1
}
for ; j >= 0; j-- {
sliceContainsBit := b.bA[j].Contains(cID)
if sliceContainsBit {
cVal |= 1 << uint64(j)
}
if uint64(compValue)&(1<<uint64(j)) > 0 {
// BIT in value is SET
if !sliceContainsBit {
if eq {
eq = false
if op == MAX && valueIsNegative && !isNegative {
gt = true
break
}
if op == MIN && (!valueIsNegative || (valueIsNegative == isNegative)) {
lt = true
}
}
}
} else {
// BIT in value is CLEAR
if sliceContainsBit {
if eq {
eq = false
if op == MIN && isNegative && !valueIsNegative {
lt = true
}
if op == MAX && (valueIsNegative || (valueIsNegative == isNegative)) {
gt = true
}
}
}
}
}
if lt || gt {
value = cVal
}
}
resultsChan <- value
}
// Sum all values contained within the foundSet. As a convenience, the cardinality of the foundSet
// is also returned (for calculating the average).
//
func (b *BSI) Sum(foundSet *roaring.Bitmap) (sum int64, count uint64) {
count = foundSet.GetCardinality()
var wg sync.WaitGroup
for i := 0; i < b.BitCount(); i++ {
wg.Add(1)
go func(j int) {
defer wg.Done()
atomic.AddInt64(&sum, int64(foundSet.AndCardinality(b.bA[j])<<uint(j)))
}(i)
}
wg.Wait()
return
}
// Transpose calls b.IntersectAndTranspose(0, b.eBM)
func (b *BSI) Transpose() *roaring.Bitmap {
return b.IntersectAndTranspose(0, b.eBM)
}
// IntersectAndTranspose is a matrix transpose function. Return a bitmap such that the values are represented as column IDs
// in the returned bitmap. This is accomplished by iterating over the foundSet and only including
// the column IDs in the source (foundSet) as compared with this BSI. This can be useful for
// vectoring one set of integers to another.
func (b *BSI) IntersectAndTranspose(parallelism int, foundSet *roaring.Bitmap) *roaring.Bitmap {
trans := &task{bsi: b}
return parallelExecutor(parallelism, trans, transpose, foundSet)
}
func transpose(e *task, batch []uint32, resultsChan chan *roaring.Bitmap, wg *sync.WaitGroup) {
defer wg.Done()
results := roaring.NewBitmap()
if e.bsi.runOptimized {
results.RunOptimize()
}
for _, cID := range batch {
if value, ok := e.bsi.GetValue(uint64(cID)); ok {
results.Add(uint32(value))
}
}
resultsChan <- results
}
// ParOr is intended primarily to be a concatenation function to be used during bulk load operations.
// Care should be taken to make sure that columnIDs do not overlap (unless overlapping values are
// identical).
func (b *BSI) ParOr(parallelism int, bsis ...*BSI) {
// Consolidate sets
bits := len(b.bA)
for i := 0; i < len(bsis); i++ {
if len(bsis[i].bA) > bits {
bits = bsis[i].BitCount()
}
}
// Make sure we have enough bit slices
for bits > b.BitCount() {
newBm := roaring.NewBitmap()
if b.runOptimized {
newBm.RunOptimize()
}
b.bA = append(b.bA, newBm)
}
a := make([][]*roaring.Bitmap, bits)
for i := range a {
a[i] = make([]*roaring.Bitmap, 0)
for _, x := range bsis {
if len(x.bA) > i {
a[i] = append(a[i], x.bA[i])
} else {
a[i] = []*roaring.Bitmap{roaring.NewBitmap()}
if b.runOptimized {
a[i][0].RunOptimize()
}
}
}
}
// Consolidate existence bit maps
ebms := make([]*roaring.Bitmap, len(bsis))
for i := range ebms {
ebms[i] = bsis[i].eBM
}
// First merge all the bit slices from all bsi maps that exist in target
var wg sync.WaitGroup
for i := 0; i < bits; i++ {
wg.Add(1)
go func(j int) {
defer wg.Done()
x := []*roaring.Bitmap{b.bA[j]}
x = append(x, a[j]...)
b.bA[j] = roaring.ParOr(parallelism, x...)
}(i)
}
wg.Wait()
// merge all the EBM maps
x := []*roaring.Bitmap{b.eBM}
x = append(x, ebms...)
b.eBM = roaring.ParOr(parallelism, x...)
}
// UnmarshalBinary de-serialize a BSI. The value at bitData[0] is the EBM. Other indices are in least to most
// significance order starting at bitData[1] (bit position 0).
func (b *BSI) UnmarshalBinary(bitData [][]byte) error {
for i := 1; i < len(bitData); i++ {
if bitData == nil || len(bitData[i]) == 0 {
continue
}
if b.BitCount() < i {
newBm := roaring.NewBitmap()
if b.runOptimized {
newBm.RunOptimize()
}
b.bA = append(b.bA, newBm)
}
if err := b.bA[i-1].UnmarshalBinary(bitData[i]); err != nil {
return err
}
if b.runOptimized {
b.bA[i-1].RunOptimize()
}
}
// First element of bitData is the EBM
if bitData[0] == nil {
b.eBM = roaring.NewBitmap()
if b.runOptimized {
b.eBM.RunOptimize()
}
return nil
}
if err := b.eBM.UnmarshalBinary(bitData[0]); err != nil {
return err
}
if b.runOptimized {
b.eBM.RunOptimize()
}
return nil
}
// MarshalBinary serializes a BSI
func (b *BSI) MarshalBinary() ([][]byte, error) {
var err error
data := make([][]byte, b.BitCount()+1)
// Add extra element for EBM (BitCount() + 1)
for i := 1; i < b.BitCount()+1; i++ {
data[i], err = b.bA[i-1].MarshalBinary()
if err != nil {
return nil, err
}
}
// Marshal EBM
data[0], err = b.eBM.MarshalBinary()
if err != nil {
return nil, err
}
return data, nil
}
// BatchEqual returns a bitmap containing the column IDs where the values are contained within the list of values provided.
func (b *BSI) BatchEqual(parallelism int, values []int64) *roaring.Bitmap {
valMap := make(map[int64]struct{}, len(values))
for i := 0; i < len(values); i++ {
valMap[values[i]] = struct{}{}
}
comp := &task{bsi: b, values: valMap}
return parallelExecutor(parallelism, comp, batchEqual, b.eBM)
}
func batchEqual(e *task, batch []uint32, resultsChan chan *roaring.Bitmap,
wg *sync.WaitGroup) {
defer wg.Done()
results := roaring.NewBitmap()
if e.bsi.runOptimized {
results.RunOptimize()
}
for i := 0; i < len(batch); i++ {
cID := batch[i]
if value, ok := e.bsi.GetValue(uint64(cID)); ok {
if _, yes := e.values[int64(value)]; yes {
results.Add(cID)
}
}
}
resultsChan <- results
}
// ClearBits cleared the bits that exist in the target if they are also in the found set.
func ClearBits(foundSet, target *roaring.Bitmap) {
iter := foundSet.Iterator()
for iter.HasNext() {
cID := iter.Next()
target.Remove(cID)
}
}
// ClearValues removes the values found in foundSet
func (b *BSI) ClearValues(foundSet *roaring.Bitmap) {
var wg sync.WaitGroup
wg.Add(1)
go func() {
defer wg.Done()
ClearBits(foundSet, b.eBM)
}()
for i := 0; i < b.BitCount(); i++ {
wg.Add(1)
go func(j int) {
defer wg.Done()
ClearBits(foundSet, b.bA[j])
}(i)
}
wg.Wait()
}
// NewBSIRetainSet - Construct a new BSI from a clone of existing BSI, retain only values contained in foundSet
func (b *BSI) NewBSIRetainSet(foundSet *roaring.Bitmap) *BSI {
newBSI := NewBSI(b.MaxValue, b.MinValue)
newBSI.bA = make([]*roaring.Bitmap, b.BitCount())
var wg sync.WaitGroup
wg.Add(1)
go func() {
defer wg.Done()
newBSI.eBM = b.eBM.Clone()
newBSI.eBM.And(foundSet)
}()
for i := 0; i < b.BitCount(); i++ {
wg.Add(1)
go func(j int) {
defer wg.Done()
newBSI.bA[j] = b.bA[j].Clone()
newBSI.bA[j].And(foundSet)
}(i)
}
wg.Wait()
return newBSI
}
// Clone performs a deep copy of BSI contents.
func (b *BSI) Clone() *BSI {
return b.NewBSIRetainSet(b.eBM)
}
// Add - In-place sum the contents of another BSI with this BSI, column wise.
func (b *BSI) Add(other *BSI) {
b.eBM.Or(other.eBM)
for i := 0; i < len(other.bA); i++ {
b.addDigit(other.bA[i], i)
}
}
func (b *BSI) addDigit(foundSet *roaring.Bitmap, i int) {
if i >= len(b.bA) {
b.bA = append(b.bA, roaring.NewBitmap())
}
carry := roaring.And(b.bA[i], foundSet)
b.bA[i].Xor(foundSet)
if !carry.IsEmpty() {
if i+1 >= len(b.bA) {
b.bA = append(b.bA, roaring.NewBitmap())
}
b.addDigit(carry, i+1)
}
}
// TransposeWithCounts is a matrix transpose function that returns a BSI that has a columnID system defined by the values
// contained within the input BSI. Given that for BSIs, different columnIDs can have the same value. TransposeWithCounts
// is useful for situations where there is a one-to-many relationship between the vectored integer sets. The resulting BSI
// contains the number of times a particular value appeared in the input BSI as an integer count.
//
func (b *BSI) TransposeWithCounts(parallelism int, foundSet *roaring.Bitmap) *BSI {
return parallelExecutorBSIResults(parallelism, b, transposeWithCounts, foundSet, true)
}
func transposeWithCounts(input *BSI, batch []uint32, resultsChan chan *BSI, wg *sync.WaitGroup) {
defer wg.Done()
results := NewDefaultBSI()
if input.runOptimized {
results.RunOptimize()
}
for _, cID := range batch {
if value, ok := input.GetValue(uint64(cID)); ok {
if val, ok2 := results.GetValue(uint64(value)); !ok2 {
results.SetValue(uint64(value), 1)
} else {
val++
results.SetValue(uint64(value), val)
}
}
}
resultsChan <- results
}
// Increment - In-place increment of values in a BSI. Found set select columns for incrementing.
func (b *BSI) Increment(foundSet *roaring.Bitmap) {
b.addDigit(foundSet, 0)
}
// IncrementAll - In-place increment of all values in a BSI.
func (b *BSI) IncrementAll() {
b.Increment(b.GetExistenceBitmap())
}

18
vendor/github.com/RoaringBitmap/roaring/CONTRIBUTORS generated vendored Normal file
View File

@@ -0,0 +1,18 @@
# This is the official list of roaring contributors
Todd Gruben (@tgruben),
Daniel Lemire (@lemire),
Elliot Murphy (@statik),
Bob Potter (@bpot),
Tyson Maly (@tvmaly),
Will Glynn (@willglynn),
Brent Pedersen (@brentp),
Jason E. Aten (@glycerine),
Vali Malinoiu (@0x4139),
Forud Ghafouri (@fzerorubigd),
Joe Nall (@joenall),
(@fredim),
Edd Robinson (@e-dard),
Alexander Petrov (@alldroll),
Guy Molinari (@guymolinari),
Ling Jin (@JinLingChristopher)

235
vendor/github.com/RoaringBitmap/roaring/LICENSE generated vendored Normal file
View File

@@ -0,0 +1,235 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright 2016 by the authors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
================================================================================
Portions of runcontainer.go are from the Go standard library, which is licensed
under:
Copyright (c) 2009 The Go Authors. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

202
vendor/github.com/RoaringBitmap/roaring/LICENSE-2.0.txt generated vendored Normal file
View File

@@ -0,0 +1,202 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright 2016 by the authors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

107
vendor/github.com/RoaringBitmap/roaring/Makefile generated vendored Normal file
View File

@@ -0,0 +1,107 @@
.PHONY: help all test format fmtcheck vet lint qa deps clean nuke ser fetch-real-roaring-datasets
# Display general help about this command
help:
@echo ""
@echo "The following commands are available:"
@echo ""
@echo " make qa : Run all the tests"
@echo " make test : Run the unit tests"
@echo ""
@echo " make format : Format the source code"
@echo " make fmtcheck : Check if the source code has been formatted"
@echo " make vet : Check for suspicious constructs"
@echo " make lint : Check for style errors"
@echo ""
@echo " make deps : Get the dependencies"
@echo " make clean : Remove any build artifact"
@echo " make nuke : Deletes any intermediate file"
@echo ""
@echo " make fuzz-smat : Fuzzy testing with smat"
@echo " make fuzz-stream : Fuzzy testing with stream deserialization"
@echo " make fuzz-buffer : Fuzzy testing with buffer deserialization"
@echo ""
# Alias for help target
all: help
test:
go test
go test -race -run TestConcurrent*
# Format the source code
format:
@find ./ -type f -name "*.go" -exec gofmt -w {} \;
# Check if the source code has been formatted
fmtcheck:
@mkdir -p target
@find ./ -type f -name "*.go" -exec gofmt -d {} \; | tee target/format.diff
@test ! -s target/format.diff || { echo "ERROR: the source code has not been formatted - please use 'make format' or 'gofmt'"; exit 1; }
# Check for syntax errors
vet:
GOPATH=$(GOPATH) go vet ./...
# Check for style errors
lint:
GOPATH=$(GOPATH) PATH=$(GOPATH)/bin:$(PATH) golint ./...
# Alias to run all quality-assurance checks
qa: fmtcheck test vet lint
# --- INSTALL ---
# Get the dependencies
deps:
GOPATH=$(GOPATH) go get github.com/stretchr/testify
GOPATH=$(GOPATH) go get github.com/bits-and-blooms/bitset
GOPATH=$(GOPATH) go get github.com/golang/lint/golint
GOPATH=$(GOPATH) go get github.com/mschoch/smat
GOPATH=$(GOPATH) go get github.com/dvyukov/go-fuzz/go-fuzz
GOPATH=$(GOPATH) go get github.com/dvyukov/go-fuzz/go-fuzz-build
GOPATH=$(GOPATH) go get github.com/glycerine/go-unsnap-stream
GOPATH=$(GOPATH) go get github.com/philhofer/fwd
GOPATH=$(GOPATH) go get github.com/jtolds/gls
fuzz-smat:
go test -tags=gofuzz -run=TestGenerateSmatCorpus
go-fuzz-build -func FuzzSmat github.com/RoaringBitmap/roaring
go-fuzz -bin=./roaring-fuzz.zip -workdir=workdir/ -timeout=200
fuzz-stream:
go-fuzz-build -func FuzzSerializationStream github.com/RoaringBitmap/roaring
go-fuzz -bin=./roaring-fuzz.zip -workdir=workdir/ -timeout=200
fuzz-buffer:
go-fuzz-build -func FuzzSerializationBuffer github.com/RoaringBitmap/roaring
go-fuzz -bin=./roaring-fuzz.zip -workdir=workdir/ -timeout=200
# Remove any build artifact
clean:
GOPATH=$(GOPATH) go clean ./...
# Deletes any intermediate file
nuke:
rm -rf ./target
GOPATH=$(GOPATH) go clean -i ./...
cover:
go test -coverprofile=coverage.out
go tool cover -html=coverage.out
fetch-real-roaring-datasets:
# pull github.com/RoaringBitmap/real-roaring-datasets -> testdata/real-roaring-datasets
git submodule init
git submodule update

405
vendor/github.com/RoaringBitmap/roaring/README.md generated vendored Normal file
View File

@@ -0,0 +1,405 @@
roaring [![Build Status](https://travis-ci.org/RoaringBitmap/roaring.png)](https://travis-ci.org/RoaringBitmap/roaring) [![GoDoc](https://godoc.org/github.com/RoaringBitmap/roaring/roaring64?status.svg)](https://godoc.org/github.com/RoaringBitmap/roaring/roaring64) [![Go Report Card](https://goreportcard.com/badge/RoaringBitmap/roaring)](https://goreportcard.com/report/github.com/RoaringBitmap/roaring)
[![Build Status](https://cloud.drone.io/api/badges/RoaringBitmap/roaring/status.svg)](https://cloud.drone.io/RoaringBitmap/roaring)
![Go-CI](https://github.com/RoaringBitmap/roaring/workflows/Go-CI/badge.svg)
![Go-ARM-CI](https://github.com/RoaringBitmap/roaring/workflows/Go-ARM-CI/badge.svg)
![Go-Windows-CI](https://github.com/RoaringBitmap/roaring/workflows/Go-Windows-CI/badge.svg)
=============
This is a go version of the Roaring bitmap data structure.
Roaring bitmaps are used by several major systems such as [Apache Lucene][lucene] and derivative systems such as [Solr][solr] and
[Elasticsearch][elasticsearch], [Apache Druid (Incubating)][druid], [LinkedIn Pinot][pinot], [Netflix Atlas][atlas], [Apache Spark][spark], [OpenSearchServer][opensearchserver], [Cloud Torrent][cloudtorrent], [Whoosh][whoosh], [Pilosa][pilosa], [Microsoft Visual Studio Team Services (VSTS)][vsts], and eBay's [Apache Kylin][kylin]. The YouTube SQL Engine, [Google Procella](https://research.google/pubs/pub48388/), uses Roaring bitmaps for indexing.
[lucene]: https://lucene.apache.org/
[solr]: https://lucene.apache.org/solr/
[elasticsearch]: https://www.elastic.co/products/elasticsearch
[druid]: https://druid.apache.org/
[spark]: https://spark.apache.org/
[opensearchserver]: http://www.opensearchserver.com
[cloudtorrent]: https://github.com/jpillora/cloud-torrent
[whoosh]: https://bitbucket.org/mchaput/whoosh/wiki/Home
[pilosa]: https://www.pilosa.com/
[kylin]: http://kylin.apache.org/
[pinot]: http://github.com/linkedin/pinot/wiki
[vsts]: https://www.visualstudio.com/team-services/
[atlas]: https://github.com/Netflix/atlas
Roaring bitmaps are found to work well in many important applications:
> Use Roaring for bitmap compression whenever possible. Do not use other bitmap compression methods ([Wang et al., SIGMOD 2017](http://db.ucsd.edu/wp-content/uploads/2017/03/sidm338-wangA.pdf))
The ``roaring`` Go library is used by
* [Cloud Torrent](https://github.com/jpillora/cloud-torrent)
* [runv](https://github.com/hyperhq/runv)
* [InfluxDB](https://www.influxdata.com)
* [Pilosa](https://www.pilosa.com/)
* [Bleve](http://www.blevesearch.com)
* [lindb](https://github.com/lindb/lindb)
* [Elasticell](https://github.com/deepfabric/elasticell)
* [SourceGraph](https://github.com/sourcegraph/sourcegraph)
* [M3](https://github.com/m3db/m3)
* [trident](https://github.com/NetApp/trident)
This library is used in production in several systems, it is part of the [Awesome Go collection](https://awesome-go.com).
There are also [Java](https://github.com/RoaringBitmap/RoaringBitmap) and [C/C++](https://github.com/RoaringBitmap/CRoaring) versions. The Java, C, C++ and Go version are binary compatible: e.g, you can save bitmaps
from a Java program and load them back in Go, and vice versa. We have a [format specification](https://github.com/RoaringBitmap/RoaringFormatSpec).
This code is licensed under Apache License, Version 2.0 (ASL2.0).
Copyright 2016-... by the authors.
When should you use a bitmap?
===================================
Sets are a fundamental abstraction in
software. They can be implemented in various
ways, as hash sets, as trees, and so forth.
In databases and search engines, sets are often an integral
part of indexes. For example, we may need to maintain a set
of all documents or rows (represented by numerical identifier)
that satisfy some property. Besides adding or removing
elements from the set, we need fast functions
to compute the intersection, the union, the difference between sets, and so on.
To implement a set
of integers, a particularly appealing strategy is the
bitmap (also called bitset or bit vector). Using n bits,
we can represent any set made of the integers from the range
[0,n): the ith bit is set to one if integer i is present in the set.
Commodity processors use words of W=32 or W=64 bits. By combining many such words, we can
support large values of n. Intersections, unions and differences can then be implemented
as bitwise AND, OR and ANDNOT operations.
More complicated set functions can also be implemented as bitwise operations.
When the bitset approach is applicable, it can be orders of
magnitude faster than other possible implementation of a set (e.g., as a hash set)
while using several times less memory.
However, a bitset, even a compressed one is not always applicable. For example, if
you have 1000 random-looking integers, then a simple array might be the best representation.
We refer to this case as the "sparse" scenario.
When should you use compressed bitmaps?
===================================
An uncompressed BitSet can use a lot of memory. For example, if you take a BitSet
and set the bit at position 1,000,000 to true and you have just over 100kB. That is over 100kB
to store the position of one bit. This is wasteful even if you do not care about memory:
suppose that you need to compute the intersection between this BitSet and another one
that has a bit at position 1,000,001 to true, then you need to go through all these zeroes,
whether you like it or not. That can become very wasteful.
This being said, there are definitively cases where attempting to use compressed bitmaps is wasteful.
For example, if you have a small universe size. E.g., your bitmaps represent sets of integers
from [0,n) where n is small (e.g., n=64 or n=128). If you are able to uncompressed BitSet and
it does not blow up your memory usage, then compressed bitmaps are probably not useful
to you. In fact, if you do not need compression, then a BitSet offers remarkable speed.
The sparse scenario is another use case where compressed bitmaps should not be used.
Keep in mind that random-looking data is usually not compressible. E.g., if you have a small set of
32-bit random integers, it is not mathematically possible to use far less than 32 bits per integer,
and attempts at compression can be counterproductive.
How does Roaring compares with the alternatives?
==================================================
Most alternatives to Roaring are part of a larger family of compressed bitmaps that are run-length-encoded
bitmaps. They identify long runs of 1s or 0s and they represent them with a marker word.
If you have a local mix of 1s and 0, you use an uncompressed word.
There are many formats in this family:
* Oracle's BBC is an obsolete format at this point: though it may provide good compression,
it is likely much slower than more recent alternatives due to excessive branching.
* WAH is a patented variation on BBC that provides better performance.
* Concise is a variation on the patented WAH. It some specific instances, it can compress
much better than WAH (up to 2x better), but it is generally slower.
* EWAH is both free of patent, and it is faster than all the above. On the downside, it
does not compress quite as well. It is faster because it allows some form of "skipping"
over uncompressed words. So though none of these formats are great at random access, EWAH
is better than the alternatives.
There is a big problem with these formats however that can hurt you badly in some cases: there is no random access. If you want to check whether a given value is present in the set, you have to start from the beginning and "uncompress" the whole thing. This means that if you want to intersect a big set with a large set, you still have to uncompress the whole big set in the worst case...
Roaring solves this problem. It works in the following manner. It divides the data into chunks of 2<sup>16</sup> integers
(e.g., [0, 2<sup>16</sup>), [2<sup>16</sup>, 2 x 2<sup>16</sup>), ...). Within a chunk, it can use an uncompressed bitmap, a simple list of integers,
or a list of runs. Whatever format it uses, they all allow you to check for the present of any one value quickly
(e.g., with a binary search). The net result is that Roaring can compute many operations much faster than run-length-encoded
formats like WAH, EWAH, Concise... Maybe surprisingly, Roaring also generally offers better compression ratios.
### References
- Daniel Lemire, Owen Kaser, Nathan Kurz, Luca Deri, Chris O'Hara, François Saint-Jacques, Gregory Ssi-Yan-Kai, Roaring Bitmaps: Implementation of an Optimized Software Library, Software: Practice and Experience 48 (4), 2018 [arXiv:1709.07821](https://arxiv.org/abs/1709.07821)
- Samy Chambi, Daniel Lemire, Owen Kaser, Robert Godin,
Better bitmap performance with Roaring bitmaps,
Software: Practice and Experience 46 (5), 2016.
http://arxiv.org/abs/1402.6407 This paper used data from http://lemire.me/data/realroaring2014.html
- Daniel Lemire, Gregory Ssi-Yan-Kai, Owen Kaser, Consistently faster and smaller compressed bitmaps with Roaring, Software: Practice and Experience 46 (11), 2016. http://arxiv.org/abs/1603.06549
### Dependencies
Dependencies are fetched automatically by giving the `-t` flag to `go get`.
they include
- github.com/bits-and-blooms/bitset
- github.com/mschoch/smat
- github.com/glycerine/go-unsnap-stream
- github.com/philhofer/fwd
- github.com/jtolds/gls
Note that the smat library requires Go 1.6 or better.
#### Installation
- go get -t github.com/RoaringBitmap/roaring
### Example
Here is a simplified but complete example:
```go
package main
import (
"fmt"
"github.com/RoaringBitmap/roaring"
"bytes"
)
func main() {
// example inspired by https://github.com/fzandona/goroar
fmt.Println("==roaring==")
rb1 := roaring.BitmapOf(1, 2, 3, 4, 5, 100, 1000)
fmt.Println(rb1.String())
rb2 := roaring.BitmapOf(3, 4, 1000)
fmt.Println(rb2.String())
rb3 := roaring.New()
fmt.Println(rb3.String())
fmt.Println("Cardinality: ", rb1.GetCardinality())
fmt.Println("Contains 3? ", rb1.Contains(3))
rb1.And(rb2)
rb3.Add(1)
rb3.Add(5)
rb3.Or(rb1)
// computes union of the three bitmaps in parallel using 4 workers
roaring.ParOr(4, rb1, rb2, rb3)
// computes intersection of the three bitmaps in parallel using 4 workers
roaring.ParAnd(4, rb1, rb2, rb3)
// prints 1, 3, 4, 5, 1000
i := rb3.Iterator()
for i.HasNext() {
fmt.Println(i.Next())
}
fmt.Println()
// next we include an example of serialization
buf := new(bytes.Buffer)
rb1.WriteTo(buf) // we omit error handling
newrb:= roaring.New()
newrb.ReadFrom(buf)
if rb1.Equals(newrb) {
fmt.Println("I wrote the content to a byte stream and read it back.")
}
// you can iterate over bitmaps using ReverseIterator(), Iterator, ManyIterator()
}
```
If you wish to use serialization and handle errors, you might want to
consider the following sample of code:
```go
rb := BitmapOf(1, 2, 3, 4, 5, 100, 1000)
buf := new(bytes.Buffer)
size,err:=rb.WriteTo(buf)
if err != nil {
t.Errorf("Failed writing")
}
newrb:= New()
size,err=newrb.ReadFrom(buf)
if err != nil {
t.Errorf("Failed reading")
}
if ! rb.Equals(newrb) {
t.Errorf("Cannot retrieve serialized version")
}
```
Given N integers in [0,x), then the serialized size in bytes of
a Roaring bitmap should never exceed this bound:
`` 8 + 9 * ((long)x+65535)/65536 + 2 * N ``
That is, given a fixed overhead for the universe size (x), Roaring
bitmaps never use more than 2 bytes per integer. You can call
``BoundSerializedSizeInBytes`` for a more precise estimate.
### 64-bit Roaring
By default, roaring is used to stored unsigned 32-bit integers. However, we also offer
an extension dedicated to 64-bit integers. It supports roughly the same functions:
```go
package main
import (
"fmt"
"github.com/RoaringBitmap/roaring/roaring64"
"bytes"
)
func main() {
// example inspired by https://github.com/fzandona/goroar
fmt.Println("==roaring64==")
rb1 := roaring64.BitmapOf(1, 2, 3, 4, 5, 100, 1000)
fmt.Println(rb1.String())
rb2 := roaring64.BitmapOf(3, 4, 1000)
fmt.Println(rb2.String())
rb3 := roaring64.New()
fmt.Println(rb3.String())
fmt.Println("Cardinality: ", rb1.GetCardinality())
fmt.Println("Contains 3? ", rb1.Contains(3))
rb1.And(rb2)
rb3.Add(1)
rb3.Add(5)
rb3.Or(rb1)
// prints 1, 3, 4, 5, 1000
i := rb3.Iterator()
for i.HasNext() {
fmt.Println(i.Next())
}
fmt.Println()
// next we include an example of serialization
buf := new(bytes.Buffer)
rb1.WriteTo(buf) // we omit error handling
newrb:= roaring64.New()
newrb.ReadFrom(buf)
if rb1.Equals(newrb) {
fmt.Println("I wrote the content to a byte stream and read it back.")
}
// you can iterate over bitmaps using ReverseIterator(), Iterator, ManyIterator()
}
```
Only the 32-bit roaring format is standard and cross-operable between Java, C++, C and Go. There is no guarantee that the 64-bit versions are compatible.
### Documentation
Current documentation is available at http://godoc.org/github.com/RoaringBitmap/roaring and http://godoc.org/github.com/RoaringBitmap/roaring64
### Goroutine safety
In general, it should not generally be considered safe to access
the same bitmaps using different goroutines--they are left
unsynchronized for performance. Should you want to access
a Bitmap from more than one goroutine, you should
provide synchronization. Typically this is done by using channels to pass
the *Bitmap around (in Go style; so there is only ever one owner),
or by using `sync.Mutex` to serialize operations on Bitmaps.
### Coverage
We test our software. For a report on our test coverage, see
https://coveralls.io/github/RoaringBitmap/roaring?branch=master
### Benchmark
Type
go test -bench Benchmark -run -
To run benchmarks on [Real Roaring Datasets](https://github.com/RoaringBitmap/real-roaring-datasets)
run the following:
```sh
go get github.com/RoaringBitmap/real-roaring-datasets
BENCH_REAL_DATA=1 go test -bench BenchmarkRealData -run -
```
### Iterative use
You can use roaring with gore:
- go get -u github.com/motemen/gore
- Make sure that ``$GOPATH/bin`` is in your ``$PATH``.
- go get github.com/RoaringBitmap/roaring
```go
$ gore
gore version 0.2.6 :help for help
gore> :import github.com/RoaringBitmap/roaring
gore> x:=roaring.New()
gore> x.Add(1)
gore> x.String()
"{1}"
```
### Fuzzy testing
You can help us test further the library with fuzzy testing:
go get github.com/dvyukov/go-fuzz/go-fuzz
go get github.com/dvyukov/go-fuzz/go-fuzz-build
go test -tags=gofuzz -run=TestGenerateSmatCorpus
go-fuzz-build github.com/RoaringBitmap/roaring
go-fuzz -bin=./roaring-fuzz.zip -workdir=workdir/ -timeout=200 -func FuzzSmat
Let it run, and if the # of crashers is > 0, check out the reports in
the workdir where you should be able to find the panic goroutine stack
traces.
You may also replace `-func FuzzSmat` by `-func FuzzSerializationBuffer` or `-func FuzzSerializationStream`.
### Alternative in Go
There is a Go version wrapping the C/C++ implementation https://github.com/RoaringBitmap/gocroaring
For an alternative implementation in Go, see https://github.com/fzandona/goroar
The two versions were written independently.
### Mailing list/discussion group
https://groups.google.com/forum/#!forum/roaring-bitmaps

1022
vendor/github.com/RoaringBitmap/roaring/arraycontainer.go generated vendored Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

11
vendor/github.com/RoaringBitmap/roaring/clz.go generated vendored Normal file
View File

@@ -0,0 +1,11 @@
// +build go1.9
// "go1.9", from Go version 1.9 onward
// See https://golang.org/pkg/go/build/#hdr-Build_Constraints
package roaring
import "math/bits"
func countLeadingZeros(x uint64) int {
return bits.LeadingZeros64(x)
}

36
vendor/github.com/RoaringBitmap/roaring/clz_compat.go generated vendored Normal file
View File

@@ -0,0 +1,36 @@
// +build !go1.9
package roaring
// LeadingZeroBits returns the number of consecutive most significant zero
// bits of x.
func countLeadingZeros(i uint64) int {
if i == 0 {
return 64
}
n := 1
x := uint32(i >> 32)
if x == 0 {
n += 32
x = uint32(i)
}
if (x >> 16) == 0 {
n += 16
x <<= 16
}
if (x >> 24) == 0 {
n += 8
x <<= 8
}
if x>>28 == 0 {
n += 4
x <<= 4
}
if x>>30 == 0 {
n += 2
x <<= 2
}
n -= int(x >> 31)
return n
}

11
vendor/github.com/RoaringBitmap/roaring/ctz.go generated vendored Normal file
View File

@@ -0,0 +1,11 @@
// +build go1.9
// "go1.9", from Go version 1.9 onward
// See https://golang.org/pkg/go/build/#hdr-Build_Constraints
package roaring
import "math/bits"
func countTrailingZeros(x uint64) int {
return bits.TrailingZeros64(x)
}

71
vendor/github.com/RoaringBitmap/roaring/ctz_compat.go generated vendored Normal file
View File

@@ -0,0 +1,71 @@
// +build !go1.9
package roaring
// Reuse of portions of go/src/math/big standard lib code
// under this license:
/*
Copyright (c) 2009 The Go Authors. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
const deBruijn32 = 0x077CB531
var deBruijn32Lookup = []byte{
0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9,
}
const deBruijn64 = 0x03f79d71b4ca8b09
var deBruijn64Lookup = []byte{
0, 1, 56, 2, 57, 49, 28, 3, 61, 58, 42, 50, 38, 29, 17, 4,
62, 47, 59, 36, 45, 43, 51, 22, 53, 39, 33, 30, 24, 18, 12, 5,
63, 55, 48, 27, 60, 41, 37, 16, 46, 35, 44, 21, 52, 32, 23, 11,
54, 26, 40, 15, 34, 20, 31, 10, 25, 14, 19, 9, 13, 8, 7, 6,
}
// trailingZeroBits returns the number of consecutive least significant zero
// bits of x.
func countTrailingZeros(x uint64) int {
// x & -x leaves only the right-most bit set in the word. Let k be the
// index of that bit. Since only a single bit is set, the value is two
// to the power of k. Multiplying by a power of two is equivalent to
// left shifting, in this case by k bits. The de Bruijn constant is
// such that all six bit, consecutive substrings are distinct.
// Therefore, if we have a left shifted version of this constant we can
// find by how many bits it was shifted by looking at which six bit
// substring ended up at the top of the word.
// (Knuth, volume 4, section 7.3.1)
if x == 0 {
// We have to special case 0; the fomula
// below doesn't work for 0.
return 64
}
return int(deBruijn64Lookup[((x&-x)*(deBruijn64))>>58])
}

View File

@@ -0,0 +1,309 @@
package roaring
import (
"container/heap"
)
// Or function that requires repairAfterLazy
func lazyOR(x1, x2 *Bitmap) *Bitmap {
answer := NewBitmap()
pos1 := 0
pos2 := 0
length1 := x1.highlowcontainer.size()
length2 := x2.highlowcontainer.size()
main:
for (pos1 < length1) && (pos2 < length2) {
s1 := x1.highlowcontainer.getKeyAtIndex(pos1)
s2 := x2.highlowcontainer.getKeyAtIndex(pos2)
for {
if s1 < s2 {
answer.highlowcontainer.appendCopy(x1.highlowcontainer, pos1)
pos1++
if pos1 == length1 {
break main
}
s1 = x1.highlowcontainer.getKeyAtIndex(pos1)
} else if s1 > s2 {
answer.highlowcontainer.appendCopy(x2.highlowcontainer, pos2)
pos2++
if pos2 == length2 {
break main
}
s2 = x2.highlowcontainer.getKeyAtIndex(pos2)
} else {
c1 := x1.highlowcontainer.getContainerAtIndex(pos1)
answer.highlowcontainer.appendContainer(s1, c1.lazyOR(x2.highlowcontainer.getContainerAtIndex(pos2)), false)
pos1++
pos2++
if (pos1 == length1) || (pos2 == length2) {
break main
}
s1 = x1.highlowcontainer.getKeyAtIndex(pos1)
s2 = x2.highlowcontainer.getKeyAtIndex(pos2)
}
}
}
if pos1 == length1 {
answer.highlowcontainer.appendCopyMany(x2.highlowcontainer, pos2, length2)
} else if pos2 == length2 {
answer.highlowcontainer.appendCopyMany(x1.highlowcontainer, pos1, length1)
}
return answer
}
// In-place Or function that requires repairAfterLazy
func (x1 *Bitmap) lazyOR(x2 *Bitmap) *Bitmap {
pos1 := 0
pos2 := 0
length1 := x1.highlowcontainer.size()
length2 := x2.highlowcontainer.size()
main:
for (pos1 < length1) && (pos2 < length2) {
s1 := x1.highlowcontainer.getKeyAtIndex(pos1)
s2 := x2.highlowcontainer.getKeyAtIndex(pos2)
for {
if s1 < s2 {
pos1++
if pos1 == length1 {
break main
}
s1 = x1.highlowcontainer.getKeyAtIndex(pos1)
} else if s1 > s2 {
x1.highlowcontainer.insertNewKeyValueAt(pos1, s2, x2.highlowcontainer.getContainerAtIndex(pos2).clone())
pos2++
pos1++
length1++
if pos2 == length2 {
break main
}
s2 = x2.highlowcontainer.getKeyAtIndex(pos2)
} else {
c1 := x1.highlowcontainer.getWritableContainerAtIndex(pos1)
x1.highlowcontainer.containers[pos1] = c1.lazyIOR(x2.highlowcontainer.getContainerAtIndex(pos2))
x1.highlowcontainer.needCopyOnWrite[pos1] = false
pos1++
pos2++
if (pos1 == length1) || (pos2 == length2) {
break main
}
s1 = x1.highlowcontainer.getKeyAtIndex(pos1)
s2 = x2.highlowcontainer.getKeyAtIndex(pos2)
}
}
}
if pos1 == length1 {
x1.highlowcontainer.appendCopyMany(x2.highlowcontainer, pos2, length2)
}
return x1
}
// to be called after lazy aggregates
func (x1 *Bitmap) repairAfterLazy() {
for pos := 0; pos < x1.highlowcontainer.size(); pos++ {
c := x1.highlowcontainer.getContainerAtIndex(pos)
switch c.(type) {
case *bitmapContainer:
if c.(*bitmapContainer).cardinality == invalidCardinality {
c = x1.highlowcontainer.getWritableContainerAtIndex(pos)
c.(*bitmapContainer).computeCardinality()
if c.(*bitmapContainer).getCardinality() <= arrayDefaultMaxSize {
x1.highlowcontainer.setContainerAtIndex(pos, c.(*bitmapContainer).toArrayContainer())
} else if c.(*bitmapContainer).isFull() {
x1.highlowcontainer.setContainerAtIndex(pos, newRunContainer16Range(0, MaxUint16))
}
}
}
}
}
// FastAnd computes the intersection between many bitmaps quickly
// Compared to the And function, it can take many bitmaps as input, thus saving the trouble
// of manually calling "And" many times.
func FastAnd(bitmaps ...*Bitmap) *Bitmap {
if len(bitmaps) == 0 {
return NewBitmap()
} else if len(bitmaps) == 1 {
return bitmaps[0].Clone()
}
answer := And(bitmaps[0], bitmaps[1])
for _, bm := range bitmaps[2:] {
answer.And(bm)
}
return answer
}
// FastOr computes the union between many bitmaps quickly, as opposed to having to call Or repeatedly.
// It might also be faster than calling Or repeatedly.
func FastOr(bitmaps ...*Bitmap) *Bitmap {
if len(bitmaps) == 0 {
return NewBitmap()
} else if len(bitmaps) == 1 {
return bitmaps[0].Clone()
}
answer := lazyOR(bitmaps[0], bitmaps[1])
for _, bm := range bitmaps[2:] {
answer = answer.lazyOR(bm)
}
// here is where repairAfterLazy is called.
answer.repairAfterLazy()
return answer
}
// HeapOr computes the union between many bitmaps quickly using a heap.
// It might be faster than calling Or repeatedly.
func HeapOr(bitmaps ...*Bitmap) *Bitmap {
if len(bitmaps) == 0 {
return NewBitmap()
}
// TODO: for better speed, we could do the operation lazily, see Java implementation
pq := make(priorityQueue, len(bitmaps))
for i, bm := range bitmaps {
pq[i] = &item{bm, i}
}
heap.Init(&pq)
for pq.Len() > 1 {
x1 := heap.Pop(&pq).(*item)
x2 := heap.Pop(&pq).(*item)
heap.Push(&pq, &item{Or(x1.value, x2.value), 0})
}
return heap.Pop(&pq).(*item).value
}
// HeapXor computes the symmetric difference between many bitmaps quickly (as opposed to calling Xor repeated).
// Internally, this function uses a heap.
// It might be faster than calling Xor repeatedly.
func HeapXor(bitmaps ...*Bitmap) *Bitmap {
if len(bitmaps) == 0 {
return NewBitmap()
}
pq := make(priorityQueue, len(bitmaps))
for i, bm := range bitmaps {
pq[i] = &item{bm, i}
}
heap.Init(&pq)
for pq.Len() > 1 {
x1 := heap.Pop(&pq).(*item)
x2 := heap.Pop(&pq).(*item)
heap.Push(&pq, &item{Xor(x1.value, x2.value), 0})
}
return heap.Pop(&pq).(*item).value
}
// AndAny provides a result equivalent to x1.And(FastOr(bitmaps)).
// It's optimized to minimize allocations. It also might be faster than separate calls.
func (x1 *Bitmap) AndAny(bitmaps ...*Bitmap) {
if len(bitmaps) == 0 {
return
} else if len(bitmaps) == 1 {
x1.And(bitmaps[0])
return
}
type withPos struct {
bitmap *roaringArray
pos int
key uint16
}
filters := make([]withPos, 0, len(bitmaps))
for _, b := range bitmaps {
if b.highlowcontainer.size() > 0 {
filters = append(filters, withPos{
bitmap: &b.highlowcontainer,
pos: 0,
key: b.highlowcontainer.getKeyAtIndex(0),
})
}
}
basePos := 0
intersections := 0
keyContainers := make([]container, 0, len(filters))
var (
tmpArray *arrayContainer
tmpBitmap *bitmapContainer
minNextKey uint16
)
for basePos < x1.highlowcontainer.size() && len(filters) > 0 {
baseKey := x1.highlowcontainer.getKeyAtIndex(basePos)
// accumulate containers for current key, find next minimal key in filters
// and exclude filters that do not have related values anymore
i := 0
maxPossibleOr := 0
minNextKey = MaxUint16
for _, f := range filters {
if f.key < baseKey {
f.pos = f.bitmap.advanceUntil(baseKey, f.pos)
if f.pos == f.bitmap.size() {
continue
}
f.key = f.bitmap.getKeyAtIndex(f.pos)
}
if f.key == baseKey {
cont := f.bitmap.getContainerAtIndex(f.pos)
keyContainers = append(keyContainers, cont)
maxPossibleOr += cont.getCardinality()
f.pos++
if f.pos == f.bitmap.size() {
continue
}
f.key = f.bitmap.getKeyAtIndex(f.pos)
}
minNextKey = minOfUint16(minNextKey, f.key)
filters[i] = f
i++
}
filters = filters[:i]
if len(keyContainers) == 0 {
basePos = x1.highlowcontainer.advanceUntil(minNextKey, basePos)
continue
}
var ored container
if len(keyContainers) == 1 {
ored = keyContainers[0]
} else {
//TODO: special case for run containers?
if maxPossibleOr > arrayDefaultMaxSize {
if tmpBitmap == nil {
tmpBitmap = newBitmapContainer()
}
tmpBitmap.resetTo(keyContainers[0])
ored = tmpBitmap
} else {
if tmpArray == nil {
tmpArray = newArrayContainerCapacity(maxPossibleOr)
}
tmpArray.realloc(maxPossibleOr)
tmpArray.resetTo(keyContainers[0])
ored = tmpArray
}
for _, c := range keyContainers[1:] {
ored = ored.ior(c)
}
}
result := x1.highlowcontainer.getWritableContainerAtIndex(basePos).iand(ored)
if !result.isEmpty() {
x1.highlowcontainer.replaceKeyAndContainerAtIndex(intersections, baseKey, result, false)
intersections++
}
keyContainers = keyContainers[:0]
basePos = x1.highlowcontainer.advanceUntil(minNextKey, basePos)
}
x1.highlowcontainer.resize(intersections)
}

View File

@@ -0,0 +1,166 @@
package internal
import (
"encoding/binary"
"io"
)
// ByteInput typed interface around io.Reader or raw bytes
type ByteInput interface {
// Next returns a slice containing the next n bytes from the buffer,
// advancing the buffer as if the bytes had been returned by Read.
Next(n int) ([]byte, error)
// ReadUInt32 reads uint32 with LittleEndian order
ReadUInt32() (uint32, error)
// ReadUInt16 reads uint16 with LittleEndian order
ReadUInt16() (uint16, error)
// GetReadBytes returns read bytes
GetReadBytes() int64
// SkipBytes skips exactly n bytes
SkipBytes(n int) error
}
// NewByteInputFromReader creates reader wrapper
func NewByteInputFromReader(reader io.Reader) ByteInput {
return &ByteInputAdapter{
r: reader,
readBytes: 0,
}
}
// NewByteInput creates raw bytes wrapper
func NewByteInput(buf []byte) ByteInput {
return &ByteBuffer{
buf: buf,
off: 0,
}
}
// ByteBuffer raw bytes wrapper
type ByteBuffer struct {
buf []byte
off int
}
// Next returns a slice containing the next n bytes from the reader
// If there are fewer bytes than the given n, io.ErrUnexpectedEOF will be returned
func (b *ByteBuffer) Next(n int) ([]byte, error) {
m := len(b.buf) - b.off
if n > m {
return nil, io.ErrUnexpectedEOF
}
data := b.buf[b.off : b.off+n]
b.off += n
return data, nil
}
// ReadUInt32 reads uint32 with LittleEndian order
func (b *ByteBuffer) ReadUInt32() (uint32, error) {
if len(b.buf)-b.off < 4 {
return 0, io.ErrUnexpectedEOF
}
v := binary.LittleEndian.Uint32(b.buf[b.off:])
b.off += 4
return v, nil
}
// ReadUInt16 reads uint16 with LittleEndian order
func (b *ByteBuffer) ReadUInt16() (uint16, error) {
if len(b.buf)-b.off < 2 {
return 0, io.ErrUnexpectedEOF
}
v := binary.LittleEndian.Uint16(b.buf[b.off:])
b.off += 2
return v, nil
}
// GetReadBytes returns read bytes
func (b *ByteBuffer) GetReadBytes() int64 {
return int64(b.off)
}
// SkipBytes skips exactly n bytes
func (b *ByteBuffer) SkipBytes(n int) error {
m := len(b.buf) - b.off
if n > m {
return io.ErrUnexpectedEOF
}
b.off += n
return nil
}
// Reset resets the given buffer with a new byte slice
func (b *ByteBuffer) Reset(buf []byte) {
b.buf = buf
b.off = 0
}
// ByteInputAdapter reader wrapper
type ByteInputAdapter struct {
r io.Reader
readBytes int
}
// Next returns a slice containing the next n bytes from the buffer,
// advancing the buffer as if the bytes had been returned by Read.
func (b *ByteInputAdapter) Next(n int) ([]byte, error) {
buf := make([]byte, n)
m, err := io.ReadAtLeast(b.r, buf, n)
b.readBytes += m
if err != nil {
return nil, err
}
return buf, nil
}
// ReadUInt32 reads uint32 with LittleEndian order
func (b *ByteInputAdapter) ReadUInt32() (uint32, error) {
buf, err := b.Next(4)
if err != nil {
return 0, err
}
return binary.LittleEndian.Uint32(buf), nil
}
// ReadUInt16 reads uint16 with LittleEndian order
func (b *ByteInputAdapter) ReadUInt16() (uint16, error) {
buf, err := b.Next(2)
if err != nil {
return 0, err
}
return binary.LittleEndian.Uint16(buf), nil
}
// GetReadBytes returns read bytes
func (b *ByteInputAdapter) GetReadBytes() int64 {
return int64(b.readBytes)
}
// SkipBytes skips exactly n bytes
func (b *ByteInputAdapter) SkipBytes(n int) error {
_, err := b.Next(n)
return err
}
// Reset resets the given buffer with a new stream
func (b *ByteInputAdapter) Reset(stream io.Reader) {
b.r = stream
b.readBytes = 0
}

View File

@@ -0,0 +1,21 @@
package internal
import (
"sync"
)
var (
// ByteInputAdapterPool shared pool
ByteInputAdapterPool = sync.Pool{
New: func() interface{} {
return &ByteInputAdapter{}
},
}
// ByteBufferPool shared pool
ByteBufferPool = sync.Pool{
New: func() interface{} {
return &ByteBuffer{}
},
}
)

View File

@@ -0,0 +1,32 @@
package roaring
type manyIterable interface {
nextMany(hs uint32, buf []uint32) int
nextMany64(hs uint64, buf []uint64) int
}
func (si *shortIterator) nextMany(hs uint32, buf []uint32) int {
n := 0
l := si.loc
s := si.slice
for n < len(buf) && l < len(s) {
buf[n] = uint32(s[l]) | hs
l++
n++
}
si.loc = l
return n
}
func (si *shortIterator) nextMany64(hs uint64, buf []uint64) int {
n := 0
l := si.loc
s := si.slice
for n < len(buf) && l < len(s) {
buf[n] = uint64(s[l]) | hs
l++
n++
}
si.loc = l
return n
}

612
vendor/github.com/RoaringBitmap/roaring/parallel.go generated vendored Normal file
View File

@@ -0,0 +1,612 @@
package roaring
import (
"container/heap"
"fmt"
"runtime"
"sync"
)
var defaultWorkerCount = runtime.NumCPU()
type bitmapContainerKey struct {
key uint16
idx int
bitmap *Bitmap
}
type multipleContainers struct {
key uint16
containers []container
idx int
}
type keyedContainer struct {
key uint16
container container
idx int
}
type bitmapContainerHeap []bitmapContainerKey
func (h bitmapContainerHeap) Len() int { return len(h) }
func (h bitmapContainerHeap) Less(i, j int) bool { return h[i].key < h[j].key }
func (h bitmapContainerHeap) Swap(i, j int) { h[i], h[j] = h[j], h[i] }
func (h *bitmapContainerHeap) Push(x interface{}) {
// Push and Pop use pointer receivers because they modify the slice's length,
// not just its contents.
*h = append(*h, x.(bitmapContainerKey))
}
func (h *bitmapContainerHeap) Pop() interface{} {
old := *h
n := len(old)
x := old[n-1]
*h = old[0 : n-1]
return x
}
func (h bitmapContainerHeap) Peek() bitmapContainerKey {
return h[0]
}
func (h *bitmapContainerHeap) popIncrementing() (key uint16, container container) {
k := h.Peek()
key = k.key
container = k.bitmap.highlowcontainer.containers[k.idx]
newIdx := k.idx + 1
if newIdx < k.bitmap.highlowcontainer.size() {
k = bitmapContainerKey{
k.bitmap.highlowcontainer.keys[newIdx],
newIdx,
k.bitmap,
}
(*h)[0] = k
heap.Fix(h, 0)
} else {
heap.Pop(h)
}
return
}
func (h *bitmapContainerHeap) Next(containers []container) multipleContainers {
if h.Len() == 0 {
return multipleContainers{}
}
key, container := h.popIncrementing()
containers = append(containers, container)
for h.Len() > 0 && key == h.Peek().key {
_, container = h.popIncrementing()
containers = append(containers, container)
}
return multipleContainers{
key,
containers,
-1,
}
}
func newBitmapContainerHeap(bitmaps ...*Bitmap) bitmapContainerHeap {
// Initialize heap
var h bitmapContainerHeap = make([]bitmapContainerKey, 0, len(bitmaps))
for _, bitmap := range bitmaps {
if !bitmap.IsEmpty() {
key := bitmapContainerKey{
bitmap.highlowcontainer.keys[0],
0,
bitmap,
}
h = append(h, key)
}
}
heap.Init(&h)
return h
}
func repairAfterLazy(c container) container {
switch t := c.(type) {
case *bitmapContainer:
if t.cardinality == invalidCardinality {
t.computeCardinality()
}
if t.getCardinality() <= arrayDefaultMaxSize {
return t.toArrayContainer()
} else if c.(*bitmapContainer).isFull() {
return newRunContainer16Range(0, MaxUint16)
}
}
return c
}
func toBitmapContainer(c container) container {
switch t := c.(type) {
case *arrayContainer:
return t.toBitmapContainer()
case *runContainer16:
if !t.isFull() {
return t.toBitmapContainer()
}
}
return c
}
func appenderRoutine(bitmapChan chan<- *Bitmap, resultChan <-chan keyedContainer, expectedKeysChan <-chan int) {
expectedKeys := -1
appendedKeys := 0
var keys []uint16
var containers []container
for appendedKeys != expectedKeys {
select {
case item := <-resultChan:
if len(keys) <= item.idx {
keys = append(keys, make([]uint16, item.idx-len(keys)+1)...)
containers = append(containers, make([]container, item.idx-len(containers)+1)...)
}
keys[item.idx] = item.key
containers[item.idx] = item.container
appendedKeys++
case msg := <-expectedKeysChan:
expectedKeys = msg
}
}
answer := &Bitmap{
roaringArray{
make([]uint16, 0, expectedKeys),
make([]container, 0, expectedKeys),
make([]bool, 0, expectedKeys),
false,
},
}
for i := range keys {
if containers[i] != nil { // in case a resulting container was empty, see ParAnd function
answer.highlowcontainer.appendContainer(keys[i], containers[i], false)
}
}
bitmapChan <- answer
}
// ParHeapOr computes the union (OR) of all provided bitmaps in parallel,
// where the parameter "parallelism" determines how many workers are to be used
// (if it is set to 0, a default number of workers is chosen)
// ParHeapOr uses a heap to compute the union. For rare cases it might be faster than ParOr
func ParHeapOr(parallelism int, bitmaps ...*Bitmap) *Bitmap {
bitmapCount := len(bitmaps)
if bitmapCount == 0 {
return NewBitmap()
} else if bitmapCount == 1 {
return bitmaps[0].Clone()
}
if parallelism == 0 {
parallelism = defaultWorkerCount
}
h := newBitmapContainerHeap(bitmaps...)
bitmapChan := make(chan *Bitmap)
inputChan := make(chan multipleContainers, 128)
resultChan := make(chan keyedContainer, 32)
expectedKeysChan := make(chan int)
pool := sync.Pool{
New: func() interface{} {
return make([]container, 0, len(bitmaps))
},
}
orFunc := func() {
// Assumes only structs with >=2 containers are passed
for input := range inputChan {
c := toBitmapContainer(input.containers[0]).lazyOR(input.containers[1])
for _, next := range input.containers[2:] {
c = c.lazyIOR(next)
}
c = repairAfterLazy(c)
kx := keyedContainer{
input.key,
c,
input.idx,
}
resultChan <- kx
pool.Put(input.containers[:0])
}
}
go appenderRoutine(bitmapChan, resultChan, expectedKeysChan)
for i := 0; i < parallelism; i++ {
go orFunc()
}
idx := 0
for h.Len() > 0 {
ck := h.Next(pool.Get().([]container))
if len(ck.containers) == 1 {
resultChan <- keyedContainer{
ck.key,
ck.containers[0],
idx,
}
pool.Put(ck.containers[:0])
} else {
ck.idx = idx
inputChan <- ck
}
idx++
}
expectedKeysChan <- idx
bitmap := <-bitmapChan
close(inputChan)
close(resultChan)
close(expectedKeysChan)
return bitmap
}
// ParAnd computes the intersection (AND) of all provided bitmaps in parallel,
// where the parameter "parallelism" determines how many workers are to be used
// (if it is set to 0, a default number of workers is chosen)
func ParAnd(parallelism int, bitmaps ...*Bitmap) *Bitmap {
bitmapCount := len(bitmaps)
if bitmapCount == 0 {
return NewBitmap()
} else if bitmapCount == 1 {
return bitmaps[0].Clone()
}
if parallelism == 0 {
parallelism = defaultWorkerCount
}
h := newBitmapContainerHeap(bitmaps...)
bitmapChan := make(chan *Bitmap)
inputChan := make(chan multipleContainers, 128)
resultChan := make(chan keyedContainer, 32)
expectedKeysChan := make(chan int)
andFunc := func() {
// Assumes only structs with >=2 containers are passed
for input := range inputChan {
c := input.containers[0].and(input.containers[1])
for _, next := range input.containers[2:] {
if c.isEmpty() {
break
}
c = c.iand(next)
}
// Send a nil explicitly if the result of the intersection is an empty container
if c.isEmpty() {
c = nil
}
kx := keyedContainer{
input.key,
c,
input.idx,
}
resultChan <- kx
}
}
go appenderRoutine(bitmapChan, resultChan, expectedKeysChan)
for i := 0; i < parallelism; i++ {
go andFunc()
}
idx := 0
for h.Len() > 0 {
ck := h.Next(make([]container, 0, 4))
if len(ck.containers) == bitmapCount {
ck.idx = idx
inputChan <- ck
idx++
}
}
expectedKeysChan <- idx
bitmap := <-bitmapChan
close(inputChan)
close(resultChan)
close(expectedKeysChan)
return bitmap
}
// ParOr computes the union (OR) of all provided bitmaps in parallel,
// where the parameter "parallelism" determines how many workers are to be used
// (if it is set to 0, a default number of workers is chosen)
func ParOr(parallelism int, bitmaps ...*Bitmap) *Bitmap {
var lKey uint16 = MaxUint16
var hKey uint16
bitmapsFiltered := bitmaps[:0]
for _, b := range bitmaps {
if !b.IsEmpty() {
bitmapsFiltered = append(bitmapsFiltered, b)
}
}
bitmaps = bitmapsFiltered
for _, b := range bitmaps {
lKey = minOfUint16(lKey, b.highlowcontainer.keys[0])
hKey = maxOfUint16(hKey, b.highlowcontainer.keys[b.highlowcontainer.size()-1])
}
if lKey == MaxUint16 && hKey == 0 {
return New()
} else if len(bitmaps) == 1 {
return bitmaps[0].Clone()
}
keyRange := int(hKey) - int(lKey) + 1
if keyRange == 1 {
// revert to FastOr. Since the key range is 0
// no container-level aggregation parallelism is achievable
return FastOr(bitmaps...)
}
if parallelism == 0 {
parallelism = defaultWorkerCount
}
var chunkSize int
var chunkCount int
if parallelism*4 > int(keyRange) {
chunkSize = 1
chunkCount = int(keyRange)
} else {
chunkCount = parallelism * 4
chunkSize = (int(keyRange) + chunkCount - 1) / chunkCount
}
if chunkCount*chunkSize < int(keyRange) {
// it's fine to panic to indicate an implementation error
panic(fmt.Sprintf("invariant check failed: chunkCount * chunkSize < keyRange, %d * %d < %d", chunkCount, chunkSize, keyRange))
}
chunks := make([]*roaringArray, chunkCount)
chunkSpecChan := make(chan parChunkSpec, minOfInt(maxOfInt(64, 2*parallelism), int(chunkCount)))
chunkChan := make(chan parChunk, minOfInt(32, int(chunkCount)))
orFunc := func() {
for spec := range chunkSpecChan {
ra := lazyOrOnRange(&bitmaps[0].highlowcontainer, &bitmaps[1].highlowcontainer, spec.start, spec.end)
for _, b := range bitmaps[2:] {
ra = lazyIOrOnRange(ra, &b.highlowcontainer, spec.start, spec.end)
}
for i, c := range ra.containers {
ra.containers[i] = repairAfterLazy(c)
}
chunkChan <- parChunk{ra, spec.idx}
}
}
for i := 0; i < parallelism; i++ {
go orFunc()
}
go func() {
for i := 0; i < chunkCount; i++ {
spec := parChunkSpec{
start: uint16(int(lKey) + i*chunkSize),
end: uint16(minOfInt(int(lKey)+(i+1)*chunkSize-1, int(hKey))),
idx: int(i),
}
chunkSpecChan <- spec
}
}()
chunksRemaining := chunkCount
for chunk := range chunkChan {
chunks[chunk.idx] = chunk.ra
chunksRemaining--
if chunksRemaining == 0 {
break
}
}
close(chunkChan)
close(chunkSpecChan)
containerCount := 0
for _, chunk := range chunks {
containerCount += chunk.size()
}
result := Bitmap{
roaringArray{
containers: make([]container, containerCount),
keys: make([]uint16, containerCount),
needCopyOnWrite: make([]bool, containerCount),
},
}
resultOffset := 0
for _, chunk := range chunks {
copy(result.highlowcontainer.containers[resultOffset:], chunk.containers)
copy(result.highlowcontainer.keys[resultOffset:], chunk.keys)
copy(result.highlowcontainer.needCopyOnWrite[resultOffset:], chunk.needCopyOnWrite)
resultOffset += chunk.size()
}
return &result
}
type parChunkSpec struct {
start uint16
end uint16
idx int
}
type parChunk struct {
ra *roaringArray
idx int
}
func (c parChunk) size() int {
return c.ra.size()
}
func parNaiveStartAt(ra *roaringArray, start uint16, last uint16) int {
for idx, key := range ra.keys {
if key >= start && key <= last {
return idx
} else if key > last {
break
}
}
return ra.size()
}
func lazyOrOnRange(ra1, ra2 *roaringArray, start, last uint16) *roaringArray {
answer := newRoaringArray()
length1 := ra1.size()
length2 := ra2.size()
idx1 := parNaiveStartAt(ra1, start, last)
idx2 := parNaiveStartAt(ra2, start, last)
var key1 uint16
var key2 uint16
if idx1 < length1 && idx2 < length2 {
key1 = ra1.getKeyAtIndex(idx1)
key2 = ra2.getKeyAtIndex(idx2)
for key1 <= last && key2 <= last {
if key1 < key2 {
answer.appendCopy(*ra1, idx1)
idx1++
if idx1 == length1 {
break
}
key1 = ra1.getKeyAtIndex(idx1)
} else if key1 > key2 {
answer.appendCopy(*ra2, idx2)
idx2++
if idx2 == length2 {
break
}
key2 = ra2.getKeyAtIndex(idx2)
} else {
c1 := ra1.getFastContainerAtIndex(idx1, false)
answer.appendContainer(key1, c1.lazyOR(ra2.getContainerAtIndex(idx2)), false)
idx1++
idx2++
if idx1 == length1 || idx2 == length2 {
break
}
key1 = ra1.getKeyAtIndex(idx1)
key2 = ra2.getKeyAtIndex(idx2)
}
}
}
if idx2 < length2 {
key2 = ra2.getKeyAtIndex(idx2)
for key2 <= last {
answer.appendCopy(*ra2, idx2)
idx2++
if idx2 == length2 {
break
}
key2 = ra2.getKeyAtIndex(idx2)
}
}
if idx1 < length1 {
key1 = ra1.getKeyAtIndex(idx1)
for key1 <= last {
answer.appendCopy(*ra1, idx1)
idx1++
if idx1 == length1 {
break
}
key1 = ra1.getKeyAtIndex(idx1)
}
}
return answer
}
func lazyIOrOnRange(ra1, ra2 *roaringArray, start, last uint16) *roaringArray {
length1 := ra1.size()
length2 := ra2.size()
idx1 := 0
idx2 := parNaiveStartAt(ra2, start, last)
var key1 uint16
var key2 uint16
if idx1 < length1 && idx2 < length2 {
key1 = ra1.getKeyAtIndex(idx1)
key2 = ra2.getKeyAtIndex(idx2)
for key1 <= last && key2 <= last {
if key1 < key2 {
idx1++
if idx1 >= length1 {
break
}
key1 = ra1.getKeyAtIndex(idx1)
} else if key1 > key2 {
ra1.insertNewKeyValueAt(idx1, key2, ra2.getContainerAtIndex(idx2))
ra1.needCopyOnWrite[idx1] = true
idx2++
idx1++
length1++
if idx2 >= length2 {
break
}
key2 = ra2.getKeyAtIndex(idx2)
} else {
c1 := ra1.getFastContainerAtIndex(idx1, true)
ra1.containers[idx1] = c1.lazyIOR(ra2.getContainerAtIndex(idx2))
ra1.needCopyOnWrite[idx1] = false
idx1++
idx2++
if idx1 >= length1 || idx2 >= length2 {
break
}
key1 = ra1.getKeyAtIndex(idx1)
key2 = ra2.getKeyAtIndex(idx2)
}
}
}
if idx2 < length2 {
key2 = ra2.getKeyAtIndex(idx2)
for key2 <= last {
ra1.appendCopy(*ra2, idx2)
idx2++
if idx2 >= length2 {
break
}
key2 = ra2.getKeyAtIndex(idx2)
}
}
return ra1
}

11
vendor/github.com/RoaringBitmap/roaring/popcnt.go generated vendored Normal file
View File

@@ -0,0 +1,11 @@
// +build go1.9
// "go1.9", from Go version 1.9 onward
// See https://golang.org/pkg/go/build/#hdr-Build_Constraints
package roaring
import "math/bits"
func popcount(x uint64) uint64 {
return uint64(bits.OnesCount64(x))
}

103
vendor/github.com/RoaringBitmap/roaring/popcnt_amd64.s generated vendored Normal file
View File

@@ -0,0 +1,103 @@
// +build amd64,!appengine,!go1.9
TEXT ·hasAsm(SB),4,$0-1
MOVQ $1, AX
CPUID
SHRQ $23, CX
ANDQ $1, CX
MOVB CX, ret+0(FP)
RET
#define POPCNTQ_DX_DX BYTE $0xf3; BYTE $0x48; BYTE $0x0f; BYTE $0xb8; BYTE $0xd2
TEXT ·popcntSliceAsm(SB),4,$0-32
XORQ AX, AX
MOVQ s+0(FP), SI
MOVQ s_len+8(FP), CX
TESTQ CX, CX
JZ popcntSliceEnd
popcntSliceLoop:
BYTE $0xf3; BYTE $0x48; BYTE $0x0f; BYTE $0xb8; BYTE $0x16 // POPCNTQ (SI), DX
ADDQ DX, AX
ADDQ $8, SI
LOOP popcntSliceLoop
popcntSliceEnd:
MOVQ AX, ret+24(FP)
RET
TEXT ·popcntMaskSliceAsm(SB),4,$0-56
XORQ AX, AX
MOVQ s+0(FP), SI
MOVQ s_len+8(FP), CX
TESTQ CX, CX
JZ popcntMaskSliceEnd
MOVQ m+24(FP), DI
popcntMaskSliceLoop:
MOVQ (DI), DX
NOTQ DX
ANDQ (SI), DX
POPCNTQ_DX_DX
ADDQ DX, AX
ADDQ $8, SI
ADDQ $8, DI
LOOP popcntMaskSliceLoop
popcntMaskSliceEnd:
MOVQ AX, ret+48(FP)
RET
TEXT ·popcntAndSliceAsm(SB),4,$0-56
XORQ AX, AX
MOVQ s+0(FP), SI
MOVQ s_len+8(FP), CX
TESTQ CX, CX
JZ popcntAndSliceEnd
MOVQ m+24(FP), DI
popcntAndSliceLoop:
MOVQ (DI), DX
ANDQ (SI), DX
POPCNTQ_DX_DX
ADDQ DX, AX
ADDQ $8, SI
ADDQ $8, DI
LOOP popcntAndSliceLoop
popcntAndSliceEnd:
MOVQ AX, ret+48(FP)
RET
TEXT ·popcntOrSliceAsm(SB),4,$0-56
XORQ AX, AX
MOVQ s+0(FP), SI
MOVQ s_len+8(FP), CX
TESTQ CX, CX
JZ popcntOrSliceEnd
MOVQ m+24(FP), DI
popcntOrSliceLoop:
MOVQ (DI), DX
ORQ (SI), DX
POPCNTQ_DX_DX
ADDQ DX, AX
ADDQ $8, SI
ADDQ $8, DI
LOOP popcntOrSliceLoop
popcntOrSliceEnd:
MOVQ AX, ret+48(FP)
RET
TEXT ·popcntXorSliceAsm(SB),4,$0-56
XORQ AX, AX
MOVQ s+0(FP), SI
MOVQ s_len+8(FP), CX
TESTQ CX, CX
JZ popcntXorSliceEnd
MOVQ m+24(FP), DI
popcntXorSliceLoop:
MOVQ (DI), DX
XORQ (SI), DX
POPCNTQ_DX_DX
ADDQ DX, AX
ADDQ $8, SI
ADDQ $8, DI
LOOP popcntXorSliceLoop
popcntXorSliceEnd:
MOVQ AX, ret+48(FP)
RET

67
vendor/github.com/RoaringBitmap/roaring/popcnt_asm.go generated vendored Normal file
View File

@@ -0,0 +1,67 @@
// +build amd64,!appengine,!go1.9
package roaring
// *** the following functions are defined in popcnt_amd64.s
//go:noescape
func hasAsm() bool
// useAsm is a flag used to select the GO or ASM implementation of the popcnt function
var useAsm = hasAsm()
//go:noescape
func popcntSliceAsm(s []uint64) uint64
//go:noescape
func popcntMaskSliceAsm(s, m []uint64) uint64
//go:noescape
func popcntAndSliceAsm(s, m []uint64) uint64
//go:noescape
func popcntOrSliceAsm(s, m []uint64) uint64
//go:noescape
func popcntXorSliceAsm(s, m []uint64) uint64
func popcntSlice(s []uint64) uint64 {
if useAsm {
return popcntSliceAsm(s)
}
return popcntSliceGo(s)
}
func popcntMaskSlice(s, m []uint64) uint64 {
if useAsm {
return popcntMaskSliceAsm(s, m)
}
return popcntMaskSliceGo(s, m)
}
func popcntAndSlice(s, m []uint64) uint64 {
if useAsm {
return popcntAndSliceAsm(s, m)
}
return popcntAndSliceGo(s, m)
}
func popcntOrSlice(s, m []uint64) uint64 {
if useAsm {
return popcntOrSliceAsm(s, m)
}
return popcntOrSliceGo(s, m)
}
func popcntXorSlice(s, m []uint64) uint64 {
if useAsm {
return popcntXorSliceAsm(s, m)
}
return popcntXorSliceGo(s, m)
}

View File

@@ -0,0 +1,17 @@
// +build !go1.9
package roaring
// bit population count, take from
// https://code.google.com/p/go/issues/detail?id=4988#c11
// credit: https://code.google.com/u/arnehormann/
// credit: https://play.golang.org/p/U7SogJ7psJ
// credit: http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
func popcount(x uint64) uint64 {
x -= (x >> 1) & 0x5555555555555555
x = (x>>2)&0x3333333333333333 + x&0x3333333333333333
x += x >> 4
x &= 0x0f0f0f0f0f0f0f0f
x *= 0x0101010101010101
return x >> 56
}

View File

@@ -0,0 +1,23 @@
// +build !amd64 appengine go1.9
package roaring
func popcntSlice(s []uint64) uint64 {
return popcntSliceGo(s)
}
func popcntMaskSlice(s, m []uint64) uint64 {
return popcntMaskSliceGo(s, m)
}
func popcntAndSlice(s, m []uint64) uint64 {
return popcntAndSliceGo(s, m)
}
func popcntOrSlice(s, m []uint64) uint64 {
return popcntOrSliceGo(s, m)
}
func popcntXorSlice(s, m []uint64) uint64 {
return popcntXorSliceGo(s, m)
}

View File

@@ -0,0 +1,41 @@
package roaring
func popcntSliceGo(s []uint64) uint64 {
cnt := uint64(0)
for _, x := range s {
cnt += popcount(x)
}
return cnt
}
func popcntMaskSliceGo(s, m []uint64) uint64 {
cnt := uint64(0)
for i := range s {
cnt += popcount(s[i] &^ m[i])
}
return cnt
}
func popcntAndSliceGo(s, m []uint64) uint64 {
cnt := uint64(0)
for i := range s {
cnt += popcount(s[i] & m[i])
}
return cnt
}
func popcntOrSliceGo(s, m []uint64) uint64 {
cnt := uint64(0)
for i := range s {
cnt += popcount(s[i] | m[i])
}
return cnt
}
func popcntXorSliceGo(s, m []uint64) uint64 {
cnt := uint64(0)
for i := range s {
cnt += popcount(s[i] ^ m[i])
}
return cnt
}

View File

@@ -0,0 +1,101 @@
package roaring
import "container/heap"
/////////////
// The priorityQueue is used to keep Bitmaps sorted.
////////////
type item struct {
value *Bitmap
index int
}
type priorityQueue []*item
func (pq priorityQueue) Len() int { return len(pq) }
func (pq priorityQueue) Less(i, j int) bool {
return pq[i].value.GetSizeInBytes() < pq[j].value.GetSizeInBytes()
}
func (pq priorityQueue) Swap(i, j int) {
pq[i], pq[j] = pq[j], pq[i]
pq[i].index = i
pq[j].index = j
}
func (pq *priorityQueue) Push(x interface{}) {
n := len(*pq)
item := x.(*item)
item.index = n
*pq = append(*pq, item)
}
func (pq *priorityQueue) Pop() interface{} {
old := *pq
n := len(old)
item := old[n-1]
item.index = -1 // for safety
*pq = old[0 : n-1]
return item
}
func (pq *priorityQueue) update(item *item, value *Bitmap) {
item.value = value
heap.Fix(pq, item.index)
}
/////////////
// The containerPriorityQueue is used to keep the containers of various Bitmaps sorted.
////////////
type containeritem struct {
value *Bitmap
keyindex int
index int
}
type containerPriorityQueue []*containeritem
func (pq containerPriorityQueue) Len() int { return len(pq) }
func (pq containerPriorityQueue) Less(i, j int) bool {
k1 := pq[i].value.highlowcontainer.getKeyAtIndex(pq[i].keyindex)
k2 := pq[j].value.highlowcontainer.getKeyAtIndex(pq[j].keyindex)
if k1 != k2 {
return k1 < k2
}
c1 := pq[i].value.highlowcontainer.getContainerAtIndex(pq[i].keyindex)
c2 := pq[j].value.highlowcontainer.getContainerAtIndex(pq[j].keyindex)
return c1.getCardinality() > c2.getCardinality()
}
func (pq containerPriorityQueue) Swap(i, j int) {
pq[i], pq[j] = pq[j], pq[i]
pq[i].index = i
pq[j].index = j
}
func (pq *containerPriorityQueue) Push(x interface{}) {
n := len(*pq)
item := x.(*containeritem)
item.index = n
*pq = append(*pq, item)
}
func (pq *containerPriorityQueue) Pop() interface{} {
old := *pq
n := len(old)
item := old[n-1]
item.index = -1 // for safety
*pq = old[0 : n-1]
return item
}
//func (pq *containerPriorityQueue) update(item *containeritem, value *Bitmap, keyindex int) {
// item.value = value
// item.keyindex = keyindex
// heap.Fix(pq, item.index)
//}

1578
vendor/github.com/RoaringBitmap/roaring/roaring.go generated vendored Normal file

File diff suppressed because it is too large Load Diff

757
vendor/github.com/RoaringBitmap/roaring/roaringarray.go generated vendored Normal file
View File

@@ -0,0 +1,757 @@
package roaring
import (
"bytes"
"encoding/binary"
"fmt"
"io"
"github.com/RoaringBitmap/roaring/internal"
)
type container interface {
addOffset(uint16) []container
clone() container
and(container) container
andCardinality(container) int
iand(container) container // i stands for inplace
andNot(container) container
iandNot(container) container // i stands for inplace
isEmpty() bool
getCardinality() int
// rank returns the number of integers that are
// smaller or equal to x. rank(infinity) would be getCardinality().
rank(uint16) int
iadd(x uint16) bool // inplace, returns true if x was new.
iaddReturnMinimized(uint16) container // may change return type to minimize storage.
//addRange(start, final int) container // range is [firstOfRange,lastOfRange) (unused)
iaddRange(start, endx int) container // i stands for inplace, range is [firstOfRange,endx)
iremove(x uint16) bool // inplace, returns true if x was present.
iremoveReturnMinimized(uint16) container // may change return type to minimize storage.
not(start, final int) container // range is [firstOfRange,lastOfRange)
inot(firstOfRange, endx int) container // i stands for inplace, range is [firstOfRange,endx)
xor(r container) container
getShortIterator() shortPeekable
iterate(cb func(x uint16) bool) bool
getReverseIterator() shortIterable
getManyIterator() manyIterable
contains(i uint16) bool
maximum() uint16
minimum() uint16
// equals is now logical equals; it does not require the
// same underlying container types, but compares across
// any of the implementations.
equals(r container) bool
fillLeastSignificant16bits(array []uint32, i int, mask uint32) int
or(r container) container
orCardinality(r container) int
isFull() bool
ior(r container) container // i stands for inplace
intersects(r container) bool // whether the two containers intersect
lazyOR(r container) container
lazyIOR(r container) container
getSizeInBytes() int
//removeRange(start, final int) container // range is [firstOfRange,lastOfRange) (unused)
iremoveRange(start, final int) container // i stands for inplace, range is [firstOfRange,lastOfRange)
selectInt(x uint16) int // selectInt returns the xth integer in the container
serializedSizeInBytes() int
writeTo(io.Writer) (int, error)
numberOfRuns() int
toEfficientContainer() container
String() string
containerType() contype
}
type contype uint8
const (
bitmapContype contype = iota
arrayContype
run16Contype
run32Contype
)
// careful: range is [firstOfRange,lastOfRange]
func rangeOfOnes(start, last int) container {
if start > MaxUint16 {
panic("rangeOfOnes called with start > MaxUint16")
}
if last > MaxUint16 {
panic("rangeOfOnes called with last > MaxUint16")
}
if start < 0 {
panic("rangeOfOnes called with start < 0")
}
if last < 0 {
panic("rangeOfOnes called with last < 0")
}
return newRunContainer16Range(uint16(start), uint16(last))
}
type roaringArray struct {
keys []uint16
containers []container `msg:"-"` // don't try to serialize directly.
needCopyOnWrite []bool
copyOnWrite bool
}
func newRoaringArray() *roaringArray {
return &roaringArray{}
}
// runOptimize compresses the element containers to minimize space consumed.
// Q: how does this interact with copyOnWrite and needCopyOnWrite?
// A: since we aren't changing the logical content, just the representation,
// we don't bother to check the needCopyOnWrite bits. We replace
// (possibly all) elements of ra.containers in-place with space
// optimized versions.
func (ra *roaringArray) runOptimize() {
for i := range ra.containers {
ra.containers[i] = ra.containers[i].toEfficientContainer()
}
}
func (ra *roaringArray) appendContainer(key uint16, value container, mustCopyOnWrite bool) {
ra.keys = append(ra.keys, key)
ra.containers = append(ra.containers, value)
ra.needCopyOnWrite = append(ra.needCopyOnWrite, mustCopyOnWrite)
}
func (ra *roaringArray) appendWithoutCopy(sa roaringArray, startingindex int) {
mustCopyOnWrite := sa.needCopyOnWrite[startingindex]
ra.appendContainer(sa.keys[startingindex], sa.containers[startingindex], mustCopyOnWrite)
}
func (ra *roaringArray) appendCopy(sa roaringArray, startingindex int) {
// cow only if the two request it, or if we already have a lightweight copy
copyonwrite := (ra.copyOnWrite && sa.copyOnWrite) || sa.needsCopyOnWrite(startingindex)
if !copyonwrite {
// since there is no copy-on-write, we need to clone the container (this is important)
ra.appendContainer(sa.keys[startingindex], sa.containers[startingindex].clone(), copyonwrite)
} else {
ra.appendContainer(sa.keys[startingindex], sa.containers[startingindex], copyonwrite)
if !sa.needsCopyOnWrite(startingindex) {
sa.setNeedsCopyOnWrite(startingindex)
}
}
}
func (ra *roaringArray) appendWithoutCopyMany(sa roaringArray, startingindex, end int) {
for i := startingindex; i < end; i++ {
ra.appendWithoutCopy(sa, i)
}
}
func (ra *roaringArray) appendCopyMany(sa roaringArray, startingindex, end int) {
for i := startingindex; i < end; i++ {
ra.appendCopy(sa, i)
}
}
func (ra *roaringArray) appendCopiesUntil(sa roaringArray, stoppingKey uint16) {
// cow only if the two request it, or if we already have a lightweight copy
copyonwrite := ra.copyOnWrite && sa.copyOnWrite
for i := 0; i < sa.size(); i++ {
if sa.keys[i] >= stoppingKey {
break
}
thiscopyonewrite := copyonwrite || sa.needsCopyOnWrite(i)
if thiscopyonewrite {
ra.appendContainer(sa.keys[i], sa.containers[i], thiscopyonewrite)
if !sa.needsCopyOnWrite(i) {
sa.setNeedsCopyOnWrite(i)
}
} else {
// since there is no copy-on-write, we need to clone the container (this is important)
ra.appendContainer(sa.keys[i], sa.containers[i].clone(), thiscopyonewrite)
}
}
}
func (ra *roaringArray) appendCopiesAfter(sa roaringArray, beforeStart uint16) {
// cow only if the two request it, or if we already have a lightweight copy
copyonwrite := ra.copyOnWrite && sa.copyOnWrite
startLocation := sa.getIndex(beforeStart)
if startLocation >= 0 {
startLocation++
} else {
startLocation = -startLocation - 1
}
for i := startLocation; i < sa.size(); i++ {
thiscopyonewrite := copyonwrite || sa.needsCopyOnWrite(i)
if thiscopyonewrite {
ra.appendContainer(sa.keys[i], sa.containers[i], thiscopyonewrite)
if !sa.needsCopyOnWrite(i) {
sa.setNeedsCopyOnWrite(i)
}
} else {
// since there is no copy-on-write, we need to clone the container (this is important)
ra.appendContainer(sa.keys[i], sa.containers[i].clone(), thiscopyonewrite)
}
}
}
func (ra *roaringArray) removeIndexRange(begin, end int) {
if end <= begin {
return
}
r := end - begin
copy(ra.keys[begin:], ra.keys[end:])
copy(ra.containers[begin:], ra.containers[end:])
copy(ra.needCopyOnWrite[begin:], ra.needCopyOnWrite[end:])
ra.resize(len(ra.keys) - r)
}
func (ra *roaringArray) resize(newsize int) {
for k := newsize; k < len(ra.containers); k++ {
ra.containers[k] = nil
}
ra.keys = ra.keys[:newsize]
ra.containers = ra.containers[:newsize]
ra.needCopyOnWrite = ra.needCopyOnWrite[:newsize]
}
func (ra *roaringArray) clear() {
ra.resize(0)
ra.copyOnWrite = false
}
func (ra *roaringArray) clone() *roaringArray {
sa := roaringArray{}
sa.copyOnWrite = ra.copyOnWrite
// this is where copyOnWrite is used.
if ra.copyOnWrite {
sa.keys = make([]uint16, len(ra.keys))
copy(sa.keys, ra.keys)
sa.containers = make([]container, len(ra.containers))
copy(sa.containers, ra.containers)
sa.needCopyOnWrite = make([]bool, len(ra.needCopyOnWrite))
ra.markAllAsNeedingCopyOnWrite()
sa.markAllAsNeedingCopyOnWrite()
// sa.needCopyOnWrite is shared
} else {
// make a full copy
sa.keys = make([]uint16, len(ra.keys))
copy(sa.keys, ra.keys)
sa.containers = make([]container, len(ra.containers))
for i := range sa.containers {
sa.containers[i] = ra.containers[i].clone()
}
sa.needCopyOnWrite = make([]bool, len(ra.needCopyOnWrite))
}
return &sa
}
// clone all containers which have needCopyOnWrite set to true
// This can be used to make sure it is safe to munmap a []byte
// that the roaring array may still have a reference to.
func (ra *roaringArray) cloneCopyOnWriteContainers() {
for i, needCopyOnWrite := range ra.needCopyOnWrite {
if needCopyOnWrite {
ra.containers[i] = ra.containers[i].clone()
ra.needCopyOnWrite[i] = false
}
}
}
// unused function:
//func (ra *roaringArray) containsKey(x uint16) bool {
// return (ra.binarySearch(0, int64(len(ra.keys)), x) >= 0)
//}
func (ra *roaringArray) getContainer(x uint16) container {
i := ra.binarySearch(0, int64(len(ra.keys)), x)
if i < 0 {
return nil
}
return ra.containers[i]
}
func (ra *roaringArray) getContainerAtIndex(i int) container {
return ra.containers[i]
}
func (ra *roaringArray) getFastContainerAtIndex(i int, needsWriteable bool) container {
c := ra.getContainerAtIndex(i)
switch t := c.(type) {
case *arrayContainer:
c = t.toBitmapContainer()
case *runContainer16:
if !t.isFull() {
c = t.toBitmapContainer()
}
case *bitmapContainer:
if needsWriteable && ra.needCopyOnWrite[i] {
c = ra.containers[i].clone()
}
}
return c
}
// getUnionedWritableContainer switches behavior for in-place Or
// depending on whether the container requires a copy on write.
// If it does using the non-inplace or() method leads to fewer allocations.
func (ra *roaringArray) getUnionedWritableContainer(pos int, other container) container {
if ra.needCopyOnWrite[pos] {
return ra.getContainerAtIndex(pos).or(other)
}
return ra.getContainerAtIndex(pos).ior(other)
}
func (ra *roaringArray) getWritableContainerAtIndex(i int) container {
if ra.needCopyOnWrite[i] {
ra.containers[i] = ra.containers[i].clone()
ra.needCopyOnWrite[i] = false
}
return ra.containers[i]
}
func (ra *roaringArray) getIndex(x uint16) int {
// before the binary search, we optimize for frequent cases
size := len(ra.keys)
if (size == 0) || (ra.keys[size-1] == x) {
return size - 1
}
return ra.binarySearch(0, int64(size), x)
}
func (ra *roaringArray) getKeyAtIndex(i int) uint16 {
return ra.keys[i]
}
func (ra *roaringArray) insertNewKeyValueAt(i int, key uint16, value container) {
ra.keys = append(ra.keys, 0)
ra.containers = append(ra.containers, nil)
copy(ra.keys[i+1:], ra.keys[i:])
copy(ra.containers[i+1:], ra.containers[i:])
ra.keys[i] = key
ra.containers[i] = value
ra.needCopyOnWrite = append(ra.needCopyOnWrite, false)
copy(ra.needCopyOnWrite[i+1:], ra.needCopyOnWrite[i:])
ra.needCopyOnWrite[i] = false
}
func (ra *roaringArray) remove(key uint16) bool {
i := ra.binarySearch(0, int64(len(ra.keys)), key)
if i >= 0 { // if a new key
ra.removeAtIndex(i)
return true
}
return false
}
func (ra *roaringArray) removeAtIndex(i int) {
copy(ra.keys[i:], ra.keys[i+1:])
copy(ra.containers[i:], ra.containers[i+1:])
copy(ra.needCopyOnWrite[i:], ra.needCopyOnWrite[i+1:])
ra.resize(len(ra.keys) - 1)
}
func (ra *roaringArray) setContainerAtIndex(i int, c container) {
ra.containers[i] = c
}
func (ra *roaringArray) replaceKeyAndContainerAtIndex(i int, key uint16, c container, mustCopyOnWrite bool) {
ra.keys[i] = key
ra.containers[i] = c
ra.needCopyOnWrite[i] = mustCopyOnWrite
}
func (ra *roaringArray) size() int {
return len(ra.keys)
}
func (ra *roaringArray) binarySearch(begin, end int64, ikey uint16) int {
low := begin
high := end - 1
for low+16 <= high {
middleIndex := low + (high-low)/2 // avoid overflow
middleValue := ra.keys[middleIndex]
if middleValue < ikey {
low = middleIndex + 1
} else if middleValue > ikey {
high = middleIndex - 1
} else {
return int(middleIndex)
}
}
for ; low <= high; low++ {
val := ra.keys[low]
if val >= ikey {
if val == ikey {
return int(low)
}
break
}
}
return -int(low + 1)
}
func (ra *roaringArray) equals(o interface{}) bool {
srb, ok := o.(roaringArray)
if ok {
if srb.size() != ra.size() {
return false
}
for i, k := range ra.keys {
if k != srb.keys[i] {
return false
}
}
for i, c := range ra.containers {
if !c.equals(srb.containers[i]) {
return false
}
}
return true
}
return false
}
func (ra *roaringArray) headerSize() uint64 {
size := uint64(len(ra.keys))
if ra.hasRunCompression() {
if size < noOffsetThreshold { // for small bitmaps, we omit the offsets
return 4 + (size+7)/8 + 4*size
}
return 4 + (size+7)/8 + 8*size // - 4 because we pack the size with the cookie
}
return 4 + 4 + 8*size
}
// should be dirt cheap
func (ra *roaringArray) serializedSizeInBytes() uint64 {
answer := ra.headerSize()
for _, c := range ra.containers {
answer += uint64(c.serializedSizeInBytes())
}
return answer
}
//
// spec: https://github.com/RoaringBitmap/RoaringFormatSpec
//
func (ra *roaringArray) writeTo(w io.Writer) (n int64, err error) {
hasRun := ra.hasRunCompression()
isRunSizeInBytes := 0
cookieSize := 8
if hasRun {
cookieSize = 4
isRunSizeInBytes = (len(ra.keys) + 7) / 8
}
descriptiveHeaderSize := 4 * len(ra.keys)
preambleSize := cookieSize + isRunSizeInBytes + descriptiveHeaderSize
buf := make([]byte, preambleSize+4*len(ra.keys))
nw := 0
if hasRun {
binary.LittleEndian.PutUint16(buf[0:], uint16(serialCookie))
nw += 2
binary.LittleEndian.PutUint16(buf[2:], uint16(len(ra.keys)-1))
nw += 2
// compute isRun bitmap without temporary allocation
var runbitmapslice = buf[nw : nw+isRunSizeInBytes]
for i, c := range ra.containers {
switch c.(type) {
case *runContainer16:
runbitmapslice[i/8] |= 1 << (uint(i) % 8)
}
}
nw += isRunSizeInBytes
} else {
binary.LittleEndian.PutUint32(buf[0:], uint32(serialCookieNoRunContainer))
nw += 4
binary.LittleEndian.PutUint32(buf[4:], uint32(len(ra.keys)))
nw += 4
}
// descriptive header
for i, key := range ra.keys {
binary.LittleEndian.PutUint16(buf[nw:], key)
nw += 2
c := ra.containers[i]
binary.LittleEndian.PutUint16(buf[nw:], uint16(c.getCardinality()-1))
nw += 2
}
startOffset := int64(preambleSize + 4*len(ra.keys))
if !hasRun || (len(ra.keys) >= noOffsetThreshold) {
// offset header
for _, c := range ra.containers {
binary.LittleEndian.PutUint32(buf[nw:], uint32(startOffset))
nw += 4
switch rc := c.(type) {
case *runContainer16:
startOffset += 2 + int64(len(rc.iv))*4
default:
startOffset += int64(getSizeInBytesFromCardinality(c.getCardinality()))
}
}
}
written, err := w.Write(buf[:nw])
if err != nil {
return n, err
}
n += int64(written)
for _, c := range ra.containers {
written, err := c.writeTo(w)
if err != nil {
return n, err
}
n += int64(written)
}
return n, nil
}
//
// spec: https://github.com/RoaringBitmap/RoaringFormatSpec
//
func (ra *roaringArray) toBytes() ([]byte, error) {
var buf bytes.Buffer
_, err := ra.writeTo(&buf)
return buf.Bytes(), err
}
func (ra *roaringArray) readFrom(stream internal.ByteInput, cookieHeader ...byte) (int64, error) {
var cookie uint32
var err error
if len(cookieHeader) > 0 && len(cookieHeader) != 4 {
return int64(len(cookieHeader)), fmt.Errorf("error in roaringArray.readFrom: could not read initial cookie: incorrect size of cookie header")
}
if len(cookieHeader) == 4 {
cookie = binary.LittleEndian.Uint32(cookieHeader)
} else {
cookie, err = stream.ReadUInt32()
if err != nil {
return stream.GetReadBytes(), fmt.Errorf("error in roaringArray.readFrom: could not read initial cookie: %s", err)
}
}
var size uint32
var isRunBitmap []byte
if cookie&0x0000FFFF == serialCookie {
size = uint32(cookie>>16 + 1)
// create is-run-container bitmap
isRunBitmapSize := (int(size) + 7) / 8
isRunBitmap, err = stream.Next(isRunBitmapSize)
if err != nil {
return stream.GetReadBytes(), fmt.Errorf("malformed bitmap, failed to read is-run bitmap, got: %s", err)
}
} else if cookie == serialCookieNoRunContainer {
size, err = stream.ReadUInt32()
if err != nil {
return stream.GetReadBytes(), fmt.Errorf("malformed bitmap, failed to read a bitmap size: %s", err)
}
} else {
return stream.GetReadBytes(), fmt.Errorf("error in roaringArray.readFrom: did not find expected serialCookie in header")
}
if size > (1 << 16) {
return stream.GetReadBytes(), fmt.Errorf("it is logically impossible to have more than (1<<16) containers")
}
// descriptive header
buf, err := stream.Next(2 * 2 * int(size))
if err != nil {
return stream.GetReadBytes(), fmt.Errorf("failed to read descriptive header: %s", err)
}
keycard := byteSliceAsUint16Slice(buf)
if isRunBitmap == nil || size >= noOffsetThreshold {
if err := stream.SkipBytes(int(size) * 4); err != nil {
return stream.GetReadBytes(), fmt.Errorf("failed to skip bytes: %s", err)
}
}
// Allocate slices upfront as number of containers is known
if cap(ra.containers) >= int(size) {
ra.containers = ra.containers[:size]
} else {
ra.containers = make([]container, size)
}
if cap(ra.keys) >= int(size) {
ra.keys = ra.keys[:size]
} else {
ra.keys = make([]uint16, size)
}
if cap(ra.needCopyOnWrite) >= int(size) {
ra.needCopyOnWrite = ra.needCopyOnWrite[:size]
} else {
ra.needCopyOnWrite = make([]bool, size)
}
for i := uint32(0); i < size; i++ {
key := keycard[2*i]
card := int(keycard[2*i+1]) + 1
ra.keys[i] = key
ra.needCopyOnWrite[i] = true
if isRunBitmap != nil && isRunBitmap[i/8]&(1<<(i%8)) != 0 {
// run container
nr, err := stream.ReadUInt16()
if err != nil {
return 0, fmt.Errorf("failed to read runtime container size: %s", err)
}
buf, err := stream.Next(int(nr) * 4)
if err != nil {
return stream.GetReadBytes(), fmt.Errorf("failed to read runtime container content: %s", err)
}
nb := runContainer16{
iv: byteSliceAsInterval16Slice(buf),
}
ra.containers[i] = &nb
} else if card > arrayDefaultMaxSize {
// bitmap container
buf, err := stream.Next(arrayDefaultMaxSize * 2)
if err != nil {
return stream.GetReadBytes(), fmt.Errorf("failed to read bitmap container: %s", err)
}
nb := bitmapContainer{
cardinality: card,
bitmap: byteSliceAsUint64Slice(buf),
}
ra.containers[i] = &nb
} else {
// array container
buf, err := stream.Next(card * 2)
if err != nil {
return stream.GetReadBytes(), fmt.Errorf("failed to read array container: %s", err)
}
nb := arrayContainer{
byteSliceAsUint16Slice(buf),
}
ra.containers[i] = &nb
}
}
return stream.GetReadBytes(), nil
}
func (ra *roaringArray) hasRunCompression() bool {
for _, c := range ra.containers {
switch c.(type) {
case *runContainer16:
return true
}
}
return false
}
func (ra *roaringArray) advanceUntil(min uint16, pos int) int {
lower := pos + 1
if lower >= len(ra.keys) || ra.keys[lower] >= min {
return lower
}
spansize := 1
for lower+spansize < len(ra.keys) && ra.keys[lower+spansize] < min {
spansize *= 2
}
var upper int
if lower+spansize < len(ra.keys) {
upper = lower + spansize
} else {
upper = len(ra.keys) - 1
}
if ra.keys[upper] == min {
return upper
}
if ra.keys[upper] < min {
// means
// array
// has no
// item
// >= min
// pos = array.length;
return len(ra.keys)
}
// we know that the next-smallest span was too small
lower += (spansize >> 1)
mid := 0
for lower+1 != upper {
mid = (lower + upper) >> 1
if ra.keys[mid] == min {
return mid
} else if ra.keys[mid] < min {
lower = mid
} else {
upper = mid
}
}
return upper
}
func (ra *roaringArray) markAllAsNeedingCopyOnWrite() {
for i := range ra.needCopyOnWrite {
ra.needCopyOnWrite[i] = true
}
}
func (ra *roaringArray) needsCopyOnWrite(i int) bool {
return ra.needCopyOnWrite[i]
}
func (ra *roaringArray) setNeedsCopyOnWrite(i int) {
ra.needCopyOnWrite[i] = true
}

2604
vendor/github.com/RoaringBitmap/roaring/runcontainer.go generated vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,19 @@
package roaring
import (
"encoding/binary"
"io"
)
// writeTo for runContainer16 follows this
// spec: https://github.com/RoaringBitmap/RoaringFormatSpec
//
func (b *runContainer16) writeTo(stream io.Writer) (int, error) {
buf := make([]byte, 2+4*len(b.iv))
binary.LittleEndian.PutUint16(buf[0:], uint16(len(b.iv)))
for i, v := range b.iv {
binary.LittleEndian.PutUint16(buf[2+i*4:], v.start)
binary.LittleEndian.PutUint16(buf[2+2+i*4:], v.length)
}
return stream.Write(buf)
}

View File

@@ -0,0 +1,133 @@
// +build !amd64,!386,!arm,!arm64,!ppc64le,!mipsle,!mips64le,!mips64p32le,!wasm appengine
package roaring
import (
"encoding/binary"
"errors"
"io"
)
func (b *arrayContainer) writeTo(stream io.Writer) (int, error) {
buf := make([]byte, 2*len(b.content))
for i, v := range b.content {
base := i * 2
buf[base] = byte(v)
buf[base+1] = byte(v >> 8)
}
return stream.Write(buf)
}
func (b *arrayContainer) readFrom(stream io.Reader) (int, error) {
err := binary.Read(stream, binary.LittleEndian, b.content)
if err != nil {
return 0, err
}
return 2 * len(b.content), nil
}
func (b *bitmapContainer) writeTo(stream io.Writer) (int, error) {
if b.cardinality <= arrayDefaultMaxSize {
return 0, errors.New("refusing to write bitmap container with cardinality of array container")
}
// Write set
buf := make([]byte, 8*len(b.bitmap))
for i, v := range b.bitmap {
base := i * 8
buf[base] = byte(v)
buf[base+1] = byte(v >> 8)
buf[base+2] = byte(v >> 16)
buf[base+3] = byte(v >> 24)
buf[base+4] = byte(v >> 32)
buf[base+5] = byte(v >> 40)
buf[base+6] = byte(v >> 48)
buf[base+7] = byte(v >> 56)
}
return stream.Write(buf)
}
func (b *bitmapContainer) readFrom(stream io.Reader) (int, error) {
err := binary.Read(stream, binary.LittleEndian, b.bitmap)
if err != nil {
return 0, err
}
b.computeCardinality()
return 8 * len(b.bitmap), nil
}
func (bc *bitmapContainer) asLittleEndianByteSlice() []byte {
by := make([]byte, len(bc.bitmap)*8)
for i := range bc.bitmap {
binary.LittleEndian.PutUint64(by[i*8:], bc.bitmap[i])
}
return by
}
func uint64SliceAsByteSlice(slice []uint64) []byte {
by := make([]byte, len(slice)*8)
for i, v := range slice {
binary.LittleEndian.PutUint64(by[i*8:], v)
}
return by
}
func uint16SliceAsByteSlice(slice []uint16) []byte {
by := make([]byte, len(slice)*2)
for i, v := range slice {
binary.LittleEndian.PutUint16(by[i*2:], v)
}
return by
}
func byteSliceAsUint16Slice(slice []byte) []uint16 {
if len(slice)%2 != 0 {
panic("Slice size should be divisible by 2")
}
b := make([]uint16, len(slice)/2)
for i := range b {
b[i] = binary.LittleEndian.Uint16(slice[2*i:])
}
return b
}
func byteSliceAsUint64Slice(slice []byte) []uint64 {
if len(slice)%8 != 0 {
panic("Slice size should be divisible by 8")
}
b := make([]uint64, len(slice)/8)
for i := range b {
b[i] = binary.LittleEndian.Uint64(slice[8*i:])
}
return b
}
// Converts a byte slice to a interval16 slice.
// The function assumes that the slice byte buffer is run container data
// encoded according to Roaring Format Spec
func byteSliceAsInterval16Slice(byteSlice []byte) []interval16 {
if len(byteSlice)%4 != 0 {
panic("Slice size should be divisible by 4")
}
intervalSlice := make([]interval16, len(byteSlice)/4)
for i := range intervalSlice {
intervalSlice[i] = interval16{
start: binary.LittleEndian.Uint16(byteSlice[i*4:]),
length: binary.LittleEndian.Uint16(byteSlice[i*4+2:]),
}
}
return intervalSlice
}

View File

@@ -0,0 +1,417 @@
// +build 386,!appengine amd64,!appengine arm,!appengine arm64,!appengine ppc64le,!appengine mipsle,!appengine mips64le,!appengine mips64p32le,!appengine wasm,!appengine
package roaring
import (
"encoding/binary"
"errors"
"io"
"reflect"
"runtime"
"unsafe"
)
func (ac *arrayContainer) writeTo(stream io.Writer) (int, error) {
buf := uint16SliceAsByteSlice(ac.content)
return stream.Write(buf)
}
func (bc *bitmapContainer) writeTo(stream io.Writer) (int, error) {
if bc.cardinality <= arrayDefaultMaxSize {
return 0, errors.New("refusing to write bitmap container with cardinality of array container")
}
buf := uint64SliceAsByteSlice(bc.bitmap)
return stream.Write(buf)
}
func uint64SliceAsByteSlice(slice []uint64) []byte {
// make a new slice header
header := *(*reflect.SliceHeader)(unsafe.Pointer(&slice))
// update its capacity and length
header.Len *= 8
header.Cap *= 8
// instantiate result and use KeepAlive so data isn't unmapped.
result := *(*[]byte)(unsafe.Pointer(&header))
runtime.KeepAlive(&slice)
// return it
return result
}
func uint16SliceAsByteSlice(slice []uint16) []byte {
// make a new slice header
header := *(*reflect.SliceHeader)(unsafe.Pointer(&slice))
// update its capacity and length
header.Len *= 2
header.Cap *= 2
// instantiate result and use KeepAlive so data isn't unmapped.
result := *(*[]byte)(unsafe.Pointer(&header))
runtime.KeepAlive(&slice)
// return it
return result
}
func (bc *bitmapContainer) asLittleEndianByteSlice() []byte {
return uint64SliceAsByteSlice(bc.bitmap)
}
// Deserialization code follows
////
// These methods (byteSliceAsUint16Slice,...) do not make copies,
// they are pointer-based (unsafe). The caller is responsible to
// ensure that the input slice does not get garbage collected, deleted
// or modified while you hold the returned slince.
////
func byteSliceAsUint16Slice(slice []byte) (result []uint16) { // here we create a new slice holder
if len(slice)%2 != 0 {
panic("Slice size should be divisible by 2")
}
// reference: https://go101.org/article/unsafe.html
// make a new slice header
bHeader := (*reflect.SliceHeader)(unsafe.Pointer(&slice))
rHeader := (*reflect.SliceHeader)(unsafe.Pointer(&result))
// transfer the data from the given slice to a new variable (our result)
rHeader.Data = bHeader.Data
rHeader.Len = bHeader.Len / 2
rHeader.Cap = bHeader.Cap / 2
// instantiate result and use KeepAlive so data isn't unmapped.
runtime.KeepAlive(&slice) // it is still crucial, GC can free it)
// return result
return
}
func byteSliceAsUint64Slice(slice []byte) (result []uint64) {
if len(slice)%8 != 0 {
panic("Slice size should be divisible by 8")
}
// reference: https://go101.org/article/unsafe.html
// make a new slice header
bHeader := (*reflect.SliceHeader)(unsafe.Pointer(&slice))
rHeader := (*reflect.SliceHeader)(unsafe.Pointer(&result))
// transfer the data from the given slice to a new variable (our result)
rHeader.Data = bHeader.Data
rHeader.Len = bHeader.Len / 8
rHeader.Cap = bHeader.Cap / 8
// instantiate result and use KeepAlive so data isn't unmapped.
runtime.KeepAlive(&slice) // it is still crucial, GC can free it)
// return result
return
}
func byteSliceAsInterval16Slice(slice []byte) (result []interval16) {
if len(slice)%4 != 0 {
panic("Slice size should be divisible by 4")
}
// reference: https://go101.org/article/unsafe.html
// make a new slice header
bHeader := (*reflect.SliceHeader)(unsafe.Pointer(&slice))
rHeader := (*reflect.SliceHeader)(unsafe.Pointer(&result))
// transfer the data from the given slice to a new variable (our result)
rHeader.Data = bHeader.Data
rHeader.Len = bHeader.Len / 4
rHeader.Cap = bHeader.Cap / 4
// instantiate result and use KeepAlive so data isn't unmapped.
runtime.KeepAlive(&slice) // it is still crucial, GC can free it)
// return result
return
}
// FromBuffer creates a bitmap from its serialized version stored in buffer.
// It uses CRoaring's frozen bitmap format.
//
// The format specification is available here:
// https://github.com/RoaringBitmap/CRoaring/blob/2c867e9f9c9e2a3a7032791f94c4c7ae3013f6e0/src/roaring.c#L2756-L2783
//
// The provided byte array (buf) is expected to be a constant.
// The function makes the best effort attempt not to copy data.
// Only little endian is supported. The function will err if it detects a big
// endian serialized file.
// You should take care not to modify buff as it will likely result in
// unexpected program behavior.
// If said buffer comes from a memory map, it's advisable to give it read
// only permissions, either at creation or by calling Mprotect from the
// golang.org/x/sys/unix package.
//
// Resulting bitmaps are effectively immutable in the following sense:
// a copy-on-write marker is used so that when you modify the resulting
// bitmap, copies of selected data (containers) are made.
// You should *not* change the copy-on-write status of the resulting
// bitmaps (SetCopyOnWrite).
//
// If buf becomes unavailable, then a bitmap created with
// FromBuffer would be effectively broken. Furthermore, any
// bitmap derived from this bitmap (e.g., via Or, And) might
// also be broken. Thus, before making buf unavailable, you should
// call CloneCopyOnWriteContainers on all such bitmaps.
//
func (rb *Bitmap) FrozenView(buf []byte) error {
return rb.highlowcontainer.frozenView(buf)
}
/* Verbatim specification from CRoaring.
*
* FROZEN SERIALIZATION FORMAT DESCRIPTION
*
* -- (beginning must be aligned by 32 bytes) --
* <bitset_data> uint64_t[BITSET_CONTAINER_SIZE_IN_WORDS * num_bitset_containers]
* <run_data> rle16_t[total number of rle elements in all run containers]
* <array_data> uint16_t[total number of array elements in all array containers]
* <keys> uint16_t[num_containers]
* <counts> uint16_t[num_containers]
* <typecodes> uint8_t[num_containers]
* <header> uint32_t
*
* <header> is a 4-byte value which is a bit union of FROZEN_COOKIE (15 bits)
* and the number of containers (17 bits).
*
* <counts> stores number of elements for every container.
* Its meaning depends on container type.
* For array and bitset containers, this value is the container cardinality minus one.
* For run container, it is the number of rle_t elements (n_runs).
*
* <bitset_data>,<array_data>,<run_data> are flat arrays of elements of
* all containers of respective type.
*
* <*_data> and <keys> are kept close together because they are not accessed
* during deserilization. This may reduce IO in case of large mmaped bitmaps.
* All members have their native alignments during deserilization except <header>,
* which is not guaranteed to be aligned by 4 bytes.
*/
const FROZEN_COOKIE = 13766
var (
FrozenBitmapInvalidCookie = errors.New("header does not contain the FROZEN_COOKIE")
FrozenBitmapBigEndian = errors.New("loading big endian frozen bitmaps is not supported")
FrozenBitmapIncomplete = errors.New("input buffer too small to contain a frozen bitmap")
FrozenBitmapOverpopulated = errors.New("too many containers")
FrozenBitmapUnexpectedData = errors.New("spurious data in input")
FrozenBitmapInvalidTypecode = errors.New("unrecognized typecode")
FrozenBitmapBufferTooSmall = errors.New("buffer too small")
)
func (ra *roaringArray) frozenView(buf []byte) error {
if len(buf) < 4 {
return FrozenBitmapIncomplete
}
headerBE := binary.BigEndian.Uint32(buf[len(buf)-4:])
if headerBE & 0x7fff == FROZEN_COOKIE {
return FrozenBitmapBigEndian
}
header := binary.LittleEndian.Uint32(buf[len(buf)-4:])
buf = buf[:len(buf)-4]
if header & 0x7fff != FROZEN_COOKIE {
return FrozenBitmapInvalidCookie
}
nCont := int(header >> 15)
if nCont > (1 << 16) {
return FrozenBitmapOverpopulated
}
// 1 byte per type, 2 bytes per key, 2 bytes per count.
if len(buf) < 5*nCont {
return FrozenBitmapIncomplete
}
types := buf[len(buf)-nCont:]
buf = buf[:len(buf)-nCont]
counts := byteSliceAsUint16Slice(buf[len(buf)-2*nCont:])
buf = buf[:len(buf)-2*nCont]
keys := byteSliceAsUint16Slice(buf[len(buf)-2*nCont:])
buf = buf[:len(buf)-2*nCont]
nBitmap, nArray, nRun := uint64(0), uint64(0), uint64(0)
nArrayEl, nRunEl := uint64(0), uint64(0)
for i, t := range types {
switch (t) {
case 1:
nBitmap++
case 2:
nArray++
nArrayEl += uint64(counts[i])+1
case 3:
nRun++
nRunEl += uint64(counts[i])
default:
return FrozenBitmapInvalidTypecode
}
}
if uint64(len(buf)) < (1 << 13)*nBitmap + 4*nRunEl + 2*nArrayEl {
return FrozenBitmapIncomplete
}
bitsetsArena := byteSliceAsUint64Slice(buf[:(1 << 13)*nBitmap])
buf = buf[(1 << 13)*nBitmap:]
runsArena := byteSliceAsInterval16Slice(buf[:4*nRunEl])
buf = buf[4*nRunEl:]
arraysArena := byteSliceAsUint16Slice(buf[:2*nArrayEl])
buf = buf[2*nArrayEl:]
if len(buf) != 0 {
return FrozenBitmapUnexpectedData
}
// TODO: maybe arena_alloc all this.
containers := make([]container, nCont)
bitsets := make([]bitmapContainer, nBitmap)
arrays := make([]arrayContainer, nArray)
runs := make([]runContainer16, nRun)
needCOW := make([]bool, nCont)
iBitset, iArray, iRun := uint64(0), uint64(0), uint64(0)
for i, t := range types {
needCOW[i] = true
switch (t) {
case 1:
containers[i] = &bitsets[iBitset]
bitsets[iBitset].cardinality = int(counts[i])+1
bitsets[iBitset].bitmap = bitsetsArena[:1024]
bitsetsArena = bitsetsArena[1024:]
iBitset++
case 2:
containers[i] = &arrays[iArray]
sz := int(counts[i])+1
arrays[iArray].content = arraysArena[:sz]
arraysArena = arraysArena[sz:]
iArray++
case 3:
containers[i] = &runs[iRun]
runs[iRun].iv = runsArena[:counts[i]]
runsArena = runsArena[counts[i]:]
iRun++
}
}
// Not consuming the full input is a bug.
if iBitset != nBitmap || len(bitsetsArena) != 0 ||
iArray != nArray || len(arraysArena) != 0 ||
iRun != nRun || len(runsArena) != 0 {
panic("we missed something")
}
ra.keys = keys
ra.containers = containers
ra.needCopyOnWrite = needCOW
ra.copyOnWrite = true
return nil
}
func (bm *Bitmap) GetFrozenSizeInBytes() uint64 {
nBits, nArrayEl, nRunEl := uint64(0), uint64(0), uint64(0)
for _, c := range bm.highlowcontainer.containers {
switch v := c.(type) {
case *bitmapContainer:
nBits++
case *arrayContainer:
nArrayEl += uint64(len(v.content))
case *runContainer16:
nRunEl += uint64(len(v.iv))
}
}
return 4 + 5*uint64(len(bm.highlowcontainer.containers)) +
(nBits << 13) + 2*nArrayEl + 4*nRunEl
}
func (bm *Bitmap) Freeze() ([]byte, error) {
sz := bm.GetFrozenSizeInBytes()
buf := make([]byte, sz)
_, err := bm.FreezeTo(buf)
return buf, err
}
func (bm *Bitmap) FreezeTo(buf []byte) (int, error) {
containers := bm.highlowcontainer.containers
nCont := len(containers)
nBits, nArrayEl, nRunEl := 0, 0, 0
for _, c := range containers {
switch v := c.(type) {
case *bitmapContainer:
nBits++
case *arrayContainer:
nArrayEl += len(v.content)
case *runContainer16:
nRunEl += len(v.iv)
}
}
serialSize := 4 + 5*nCont + (1 << 13)*nBits + 4*nRunEl + 2*nArrayEl
if len(buf) < serialSize {
return 0, FrozenBitmapBufferTooSmall
}
bitsArena := byteSliceAsUint64Slice(buf[:(1 << 13)*nBits])
buf = buf[(1 << 13)*nBits:]
runsArena := byteSliceAsInterval16Slice(buf[:4*nRunEl])
buf = buf[4*nRunEl:]
arraysArena := byteSliceAsUint16Slice(buf[:2*nArrayEl])
buf = buf[2*nArrayEl:]
keys := byteSliceAsUint16Slice(buf[:2*nCont])
buf = buf[2*nCont:]
counts := byteSliceAsUint16Slice(buf[:2*nCont])
buf = buf[2*nCont:]
types := buf[:nCont]
buf = buf[nCont:]
header := uint32(FROZEN_COOKIE|(nCont << 15))
binary.LittleEndian.PutUint32(buf[:4], header)
copy(keys, bm.highlowcontainer.keys[:])
for i, c := range containers {
switch v := c.(type) {
case *bitmapContainer:
copy(bitsArena, v.bitmap)
bitsArena = bitsArena[1024:]
counts[i] = uint16(v.cardinality-1)
types[i] = 1
case *arrayContainer:
copy(arraysArena, v.content)
arraysArena = arraysArena[len(v.content):]
elems := len(v.content)
counts[i] = uint16(elems-1)
types[i] = 2
case *runContainer16:
copy(runsArena, v.iv)
runs := len(v.iv)
runsArena = runsArena[runs:]
counts[i] = uint16(runs)
types[i] = 3
}
}
return serialSize, nil
}

View File

@@ -0,0 +1,21 @@
// +build gofuzz
package roaring
import "bytes"
func FuzzSerializationStream(data []byte) int {
newrb := NewBitmap()
if _, err := newrb.ReadFrom(bytes.NewReader(data)); err != nil {
return 0
}
return 1
}
func FuzzSerializationBuffer(data []byte) int {
newrb := NewBitmap()
if _, err := newrb.FromBuffer(data); err != nil {
return 0
}
return 1
}

550
vendor/github.com/RoaringBitmap/roaring/setutil.go generated vendored Normal file
View File

@@ -0,0 +1,550 @@
package roaring
func equal(a, b []uint16) bool {
if len(a) != len(b) {
return false
}
for i := range a {
if a[i] != b[i] {
return false
}
}
return true
}
func difference(set1 []uint16, set2 []uint16, buffer []uint16) int {
if 0 == len(set2) {
buffer = buffer[:len(set1)]
for k := 0; k < len(set1); k++ {
buffer[k] = set1[k]
}
return len(set1)
}
if 0 == len(set1) {
return 0
}
pos := 0
k1 := 0
k2 := 0
buffer = buffer[:cap(buffer)]
s1 := set1[k1]
s2 := set2[k2]
for {
if s1 < s2 {
buffer[pos] = s1
pos++
k1++
if k1 >= len(set1) {
break
}
s1 = set1[k1]
} else if s1 == s2 {
k1++
k2++
if k1 >= len(set1) {
break
}
s1 = set1[k1]
if k2 >= len(set2) {
for ; k1 < len(set1); k1++ {
buffer[pos] = set1[k1]
pos++
}
break
}
s2 = set2[k2]
} else { // if (val1>val2)
k2++
if k2 >= len(set2) {
for ; k1 < len(set1); k1++ {
buffer[pos] = set1[k1]
pos++
}
break
}
s2 = set2[k2]
}
}
return pos
}
func exclusiveUnion2by2(set1 []uint16, set2 []uint16, buffer []uint16) int {
if 0 == len(set2) {
buffer = buffer[:len(set1)]
copy(buffer, set1[:])
return len(set1)
}
if 0 == len(set1) {
buffer = buffer[:len(set2)]
copy(buffer, set2[:])
return len(set2)
}
pos := 0
k1 := 0
k2 := 0
s1 := set1[k1]
s2 := set2[k2]
buffer = buffer[:cap(buffer)]
for {
if s1 < s2 {
buffer[pos] = s1
pos++
k1++
if k1 >= len(set1) {
for ; k2 < len(set2); k2++ {
buffer[pos] = set2[k2]
pos++
}
break
}
s1 = set1[k1]
} else if s1 == s2 {
k1++
k2++
if k1 >= len(set1) {
for ; k2 < len(set2); k2++ {
buffer[pos] = set2[k2]
pos++
}
break
}
if k2 >= len(set2) {
for ; k1 < len(set1); k1++ {
buffer[pos] = set1[k1]
pos++
}
break
}
s1 = set1[k1]
s2 = set2[k2]
} else { // if (val1>val2)
buffer[pos] = s2
pos++
k2++
if k2 >= len(set2) {
for ; k1 < len(set1); k1++ {
buffer[pos] = set1[k1]
pos++
}
break
}
s2 = set2[k2]
}
}
return pos
}
func union2by2Cardinality(set1 []uint16, set2 []uint16) int {
pos := 0
k1 := 0
k2 := 0
if 0 == len(set2) {
return len(set1)
}
if 0 == len(set1) {
return len(set2)
}
s1 := set1[k1]
s2 := set2[k2]
for {
if s1 < s2 {
pos++
k1++
if k1 >= len(set1) {
pos += len(set2) - k2
break
}
s1 = set1[k1]
} else if s1 == s2 {
pos++
k1++
k2++
if k1 >= len(set1) {
pos += len(set2) - k2
break
}
if k2 >= len(set2) {
pos += len(set1) - k1
break
}
s1 = set1[k1]
s2 = set2[k2]
} else { // if (set1[k1]>set2[k2])
pos++
k2++
if k2 >= len(set2) {
pos += len(set1) - k1
break
}
s2 = set2[k2]
}
}
return pos
}
func intersection2by2(
set1 []uint16,
set2 []uint16,
buffer []uint16) int {
if len(set1)*64 < len(set2) {
return onesidedgallopingintersect2by2(set1, set2, buffer)
} else if len(set2)*64 < len(set1) {
return onesidedgallopingintersect2by2(set2, set1, buffer)
} else {
return localintersect2by2(set1, set2, buffer)
}
}
func intersection2by2Cardinality(
set1 []uint16,
set2 []uint16) int {
if len(set1)*64 < len(set2) {
return onesidedgallopingintersect2by2Cardinality(set1, set2)
} else if len(set2)*64 < len(set1) {
return onesidedgallopingintersect2by2Cardinality(set2, set1)
} else {
return localintersect2by2Cardinality(set1, set2)
}
}
func intersects2by2(
set1 []uint16,
set2 []uint16) bool {
// could be optimized if one set is much larger than the other one
if (0 == len(set1)) || (0 == len(set2)) {
return false
}
k1 := 0
k2 := 0
s1 := set1[k1]
s2 := set2[k2]
mainwhile:
for {
if s2 < s1 {
for {
k2++
if k2 == len(set2) {
break mainwhile
}
s2 = set2[k2]
if s2 >= s1 {
break
}
}
}
if s1 < s2 {
for {
k1++
if k1 == len(set1) {
break mainwhile
}
s1 = set1[k1]
if s1 >= s2 {
break
}
}
} else {
// (set2[k2] == set1[k1])
return true
}
}
return false
}
func localintersect2by2(
set1 []uint16,
set2 []uint16,
buffer []uint16) int {
if (0 == len(set1)) || (0 == len(set2)) {
return 0
}
k1 := 0
k2 := 0
pos := 0
buffer = buffer[:cap(buffer)]
s1 := set1[k1]
s2 := set2[k2]
mainwhile:
for {
if s2 < s1 {
for {
k2++
if k2 == len(set2) {
break mainwhile
}
s2 = set2[k2]
if s2 >= s1 {
break
}
}
}
if s1 < s2 {
for {
k1++
if k1 == len(set1) {
break mainwhile
}
s1 = set1[k1]
if s1 >= s2 {
break
}
}
} else {
// (set2[k2] == set1[k1])
buffer[pos] = s1
pos++
k1++
if k1 == len(set1) {
break
}
s1 = set1[k1]
k2++
if k2 == len(set2) {
break
}
s2 = set2[k2]
}
}
return pos
}
func localintersect2by2Cardinality(
set1 []uint16,
set2 []uint16) int {
if (0 == len(set1)) || (0 == len(set2)) {
return 0
}
k1 := 0
k2 := 0
pos := 0
s1 := set1[k1]
s2 := set2[k2]
mainwhile:
for {
if s2 < s1 {
for {
k2++
if k2 == len(set2) {
break mainwhile
}
s2 = set2[k2]
if s2 >= s1 {
break
}
}
}
if s1 < s2 {
for {
k1++
if k1 == len(set1) {
break mainwhile
}
s1 = set1[k1]
if s1 >= s2 {
break
}
}
} else {
// (set2[k2] == set1[k1])
pos++
k1++
if k1 == len(set1) {
break
}
s1 = set1[k1]
k2++
if k2 == len(set2) {
break
}
s2 = set2[k2]
}
}
return pos
}
func advanceUntil(
array []uint16,
pos int,
length int,
min uint16) int {
lower := pos + 1
if lower >= length || array[lower] >= min {
return lower
}
spansize := 1
for lower+spansize < length && array[lower+spansize] < min {
spansize *= 2
}
var upper int
if lower+spansize < length {
upper = lower + spansize
} else {
upper = length - 1
}
if array[upper] == min {
return upper
}
if array[upper] < min {
// means
// array
// has no
// item
// >= min
// pos = array.length;
return length
}
// we know that the next-smallest span was too small
lower += (spansize >> 1)
mid := 0
for lower+1 != upper {
mid = (lower + upper) >> 1
if array[mid] == min {
return mid
} else if array[mid] < min {
lower = mid
} else {
upper = mid
}
}
return upper
}
func onesidedgallopingintersect2by2(
smallset []uint16,
largeset []uint16,
buffer []uint16) int {
if 0 == len(smallset) {
return 0
}
buffer = buffer[:cap(buffer)]
k1 := 0
k2 := 0
pos := 0
s1 := largeset[k1]
s2 := smallset[k2]
mainwhile:
for {
if s1 < s2 {
k1 = advanceUntil(largeset, k1, len(largeset), s2)
if k1 == len(largeset) {
break mainwhile
}
s1 = largeset[k1]
}
if s2 < s1 {
k2++
if k2 == len(smallset) {
break mainwhile
}
s2 = smallset[k2]
} else {
buffer[pos] = s2
pos++
k2++
if k2 == len(smallset) {
break
}
s2 = smallset[k2]
k1 = advanceUntil(largeset, k1, len(largeset), s2)
if k1 == len(largeset) {
break mainwhile
}
s1 = largeset[k1]
}
}
return pos
}
func onesidedgallopingintersect2by2Cardinality(
smallset []uint16,
largeset []uint16) int {
if 0 == len(smallset) {
return 0
}
k1 := 0
k2 := 0
pos := 0
s1 := largeset[k1]
s2 := smallset[k2]
mainwhile:
for {
if s1 < s2 {
k1 = advanceUntil(largeset, k1, len(largeset), s2)
if k1 == len(largeset) {
break mainwhile
}
s1 = largeset[k1]
}
if s2 < s1 {
k2++
if k2 == len(smallset) {
break mainwhile
}
s2 = smallset[k2]
} else {
pos++
k2++
if k2 == len(smallset) {
break
}
s2 = smallset[k2]
k1 = advanceUntil(largeset, k1, len(largeset), s2)
if k1 == len(largeset) {
break mainwhile
}
s1 = largeset[k1]
}
}
return pos
}
func binarySearch(array []uint16, ikey uint16) int {
low := 0
high := len(array) - 1
for low+16 <= high {
middleIndex := int(uint32(low+high) >> 1)
middleValue := array[middleIndex]
if middleValue < ikey {
low = middleIndex + 1
} else if middleValue > ikey {
high = middleIndex - 1
} else {
return middleIndex
}
}
for ; low <= high; low++ {
val := array[low]
if val >= ikey {
if val == ikey {
return low
}
break
}
}
return -(low + 1)
}

View File

@@ -0,0 +1,6 @@
// +build arm64,!gccgo,!appengine
package roaring
//go:noescape
func union2by2(set1 []uint16, set2 []uint16, buffer []uint16) (size int)

132
vendor/github.com/RoaringBitmap/roaring/setutil_arm64.s generated vendored Normal file
View File

@@ -0,0 +1,132 @@
// +build arm64,!gccgo,!appengine
#include "textflag.h"
// This implements union2by2 using golang's version of arm64 assembly
// The algorithm is very similar to the generic one,
// but makes better use of arm64 features so is notably faster.
// The basic algorithm structure is as follows:
// 1. If either set is empty, copy the other set into the buffer and return the length
// 2. Otherwise, load the first element of each set into a variable (s1 and s2).
// 3. a. Compare the values of s1 and s2.
// b. add the smaller one to the buffer.
// c. perform a bounds check before incrementing.
// If one set is finished, copy the rest of the other set over.
// d. update s1 and or s2 to the next value, continue loop.
//
// Past the fact of the algorithm, this code makes use of several arm64 features
// Condition Codes:
// arm64's CMP operation sets 4 bits that can be used for branching,
// rather than just true or false.
// As a consequence, a single comparison gives enough information to distinguish the three cases
//
// Post-increment pointers after load/store:
// Instructions like `MOVHU.P 2(R0), R6`
// increment the register by a specified amount, in this example 2.
// Because uint16's are exactly 2 bytes and the length of the slices
// is part of the slice header,
// there is no need to separately track the index into the slice.
// Instead, the code can calculate the final read value and compare against that,
// using the post-increment reads to move the pointers along.
//
// TODO: CALL out to memmove once the list is exhausted.
// Right now it moves the necessary shorts so that the remaining count
// is a multiple of 4 and then copies 64 bits at a time.
TEXT ·union2by2(SB), NOSPLIT, $0-80
// R0, R1, and R2 for the pointers to the three slices
MOVD set1+0(FP), R0
MOVD set2+24(FP), R1
MOVD buffer+48(FP), R2
//R3 and R4 will be the values at which we will have finished reading set1 and set2.
// R3 should be R0 + 2 * set1_len+8(FP)
MOVD set1_len+8(FP), R3
MOVD set2_len+32(FP), R4
ADD R3<<1, R0, R3
ADD R4<<1, R1, R4
//Rather than counting the number of elements added separately
//Save the starting register of buffer.
MOVD buffer+48(FP), R5
// set1 is empty, just flush set2
CMP R0, R3
BEQ flush_right
// set2 is empty, just flush set1
CMP R1, R4
BEQ flush_left
// R6, R7 are the working space for s1 and s2
MOVD ZR, R6
MOVD ZR, R7
MOVHU.P 2(R0), R6
MOVHU.P 2(R1), R7
loop:
CMP R6, R7
BEQ pop_both // R6 == R7
BLS pop_right // R6 > R7
//pop_left: // R6 < R7
MOVHU.P R6, 2(R2)
CMP R0, R3
BEQ pop_then_flush_right
MOVHU.P 2(R0), R6
JMP loop
pop_both:
MOVHU.P R6, 2(R2) //could also use R7, since they are equal
CMP R0, R3
BEQ flush_right
CMP R1, R4
BEQ flush_left
MOVHU.P 2(R0), R6
MOVHU.P 2(R1), R7
JMP loop
pop_right:
MOVHU.P R7, 2(R2)
CMP R1, R4
BEQ pop_then_flush_left
MOVHU.P 2(R1), R7
JMP loop
pop_then_flush_right:
MOVHU.P R7, 2(R2)
flush_right:
MOVD R1, R0
MOVD R4, R3
JMP flush_left
pop_then_flush_left:
MOVHU.P R6, 2(R2)
flush_left:
CMP R0, R3
BEQ return
//figure out how many bytes to slough off. Must be a multiple of two
SUB R0, R3, R4
ANDS $6, R4
BEQ long_flush //handles the 0 mod 8 case
SUBS $4, R4, R4 // since possible values are 2, 4, 6, this splits evenly
BLT pop_single // exactly the 2 case
MOVW.P 4(R0), R6
MOVW.P R6, 4(R2)
BEQ long_flush // we're now aligned by 64 bits, as R4==4, otherwise 2 more
pop_single:
MOVHU.P 2(R0), R6
MOVHU.P R6, 2(R2)
long_flush:
// at this point we know R3 - R0 is a multiple of 8.
CMP R0, R3
BEQ return
MOVD.P 8(R0), R6
MOVD.P R6, 8(R2)
JMP long_flush
return:
// number of shorts written is (R5 - R2) >> 1
SUB R5, R2
LSR $1, R2, R2
MOVD R2, size+72(FP)
RET

View File

@@ -0,0 +1,63 @@
// +build !arm64 gccgo appengine
package roaring
func union2by2(set1 []uint16, set2 []uint16, buffer []uint16) int {
pos := 0
k1 := 0
k2 := 0
if 0 == len(set2) {
buffer = buffer[:len(set1)]
copy(buffer, set1[:])
return len(set1)
}
if 0 == len(set1) {
buffer = buffer[:len(set2)]
copy(buffer, set2[:])
return len(set2)
}
s1 := set1[k1]
s2 := set2[k2]
buffer = buffer[:cap(buffer)]
for {
if s1 < s2 {
buffer[pos] = s1
pos++
k1++
if k1 >= len(set1) {
copy(buffer[pos:], set2[k2:])
pos += len(set2) - k2
break
}
s1 = set1[k1]
} else if s1 == s2 {
buffer[pos] = s1
pos++
k1++
k2++
if k1 >= len(set1) {
copy(buffer[pos:], set2[k2:])
pos += len(set2) - k2
break
}
if k2 >= len(set2) {
copy(buffer[pos:], set1[k1:])
pos += len(set1) - k1
break
}
s1 = set1[k1]
s2 = set2[k2]
} else { // if (set1[k1]>set2[k2])
buffer[pos] = s2
pos++
k2++
if k2 >= len(set2) {
copy(buffer[pos:], set1[k1:])
pos += len(set1) - k1
break
}
s2 = set2[k2]
}
}
return pos
}

View File

@@ -0,0 +1,52 @@
package roaring
type shortIterable interface {
hasNext() bool
next() uint16
}
type shortPeekable interface {
shortIterable
peekNext() uint16
advanceIfNeeded(minval uint16)
}
type shortIterator struct {
slice []uint16
loc int
}
func (si *shortIterator) hasNext() bool {
return si.loc < len(si.slice)
}
func (si *shortIterator) next() uint16 {
a := si.slice[si.loc]
si.loc++
return a
}
func (si *shortIterator) peekNext() uint16 {
return si.slice[si.loc]
}
func (si *shortIterator) advanceIfNeeded(minval uint16) {
if si.hasNext() && si.peekNext() < minval {
si.loc = advanceUntil(si.slice, si.loc, len(si.slice), minval)
}
}
type reverseIterator struct {
slice []uint16
loc int
}
func (si *reverseIterator) hasNext() bool {
return si.loc >= 0
}
func (si *reverseIterator) next() uint16 {
a := si.slice[si.loc]
si.loc--
return a
}

383
vendor/github.com/RoaringBitmap/roaring/smat.go generated vendored Normal file
View File

@@ -0,0 +1,383 @@
// +build gofuzz
/*
# Instructions for smat testing for roaring
[smat](https://github.com/mschoch/smat) is a framework that provides
state machine assisted fuzz testing.
To run the smat tests for roaring...
## Prerequisites
$ go get github.com/dvyukov/go-fuzz/go-fuzz
$ go get github.com/dvyukov/go-fuzz/go-fuzz-build
## Steps
1. Generate initial smat corpus:
```
go test -tags=gofuzz -run=TestGenerateSmatCorpus
```
2. Build go-fuzz test program with instrumentation:
```
go-fuzz-build -func FuzzSmat github.com/RoaringBitmap/roaring
```
3. Run go-fuzz:
```
go-fuzz -bin=./roaring-fuzz.zip -workdir=workdir/ -timeout=200
```
You should see output like...
```
2016/09/16 13:58:35 slaves: 8, corpus: 1 (3s ago), crashers: 0, restarts: 1/0, execs: 0 (0/sec), cover: 0, uptime: 3s
2016/09/16 13:58:38 slaves: 8, corpus: 1 (6s ago), crashers: 0, restarts: 1/0, execs: 0 (0/sec), cover: 0, uptime: 6s
2016/09/16 13:58:41 slaves: 8, corpus: 1 (9s ago), crashers: 0, restarts: 1/44, execs: 44 (5/sec), cover: 0, uptime: 9s
2016/09/16 13:58:44 slaves: 8, corpus: 1 (12s ago), crashers: 0, restarts: 1/45, execs: 45 (4/sec), cover: 0, uptime: 12s
2016/09/16 13:58:47 slaves: 8, corpus: 1 (15s ago), crashers: 0, restarts: 1/46, execs: 46 (3/sec), cover: 0, uptime: 15s
2016/09/16 13:58:50 slaves: 8, corpus: 1 (18s ago), crashers: 0, restarts: 1/47, execs: 47 (3/sec), cover: 0, uptime: 18s
2016/09/16 13:58:53 slaves: 8, corpus: 1 (21s ago), crashers: 0, restarts: 1/63, execs: 63 (3/sec), cover: 0, uptime: 21s
2016/09/16 13:58:56 slaves: 8, corpus: 1 (24s ago), crashers: 0, restarts: 1/65, execs: 65 (3/sec), cover: 0, uptime: 24s
2016/09/16 13:58:59 slaves: 8, corpus: 1 (27s ago), crashers: 0, restarts: 1/66, execs: 66 (2/sec), cover: 0, uptime: 27s
2016/09/16 13:59:02 slaves: 8, corpus: 1 (30s ago), crashers: 0, restarts: 1/67, execs: 67 (2/sec), cover: 0, uptime: 30s
2016/09/16 13:59:05 slaves: 8, corpus: 1 (33s ago), crashers: 0, restarts: 1/83, execs: 83 (3/sec), cover: 0, uptime: 33s
2016/09/16 13:59:08 slaves: 8, corpus: 1 (36s ago), crashers: 0, restarts: 1/84, execs: 84 (2/sec), cover: 0, uptime: 36s
2016/09/16 13:59:11 slaves: 8, corpus: 2 (0s ago), crashers: 0, restarts: 1/85, execs: 85 (2/sec), cover: 0, uptime: 39s
2016/09/16 13:59:14 slaves: 8, corpus: 17 (2s ago), crashers: 0, restarts: 1/86, execs: 86 (2/sec), cover: 480, uptime: 42s
2016/09/16 13:59:17 slaves: 8, corpus: 17 (5s ago), crashers: 0, restarts: 1/66, execs: 132 (3/sec), cover: 487, uptime: 45s
2016/09/16 13:59:20 slaves: 8, corpus: 17 (8s ago), crashers: 0, restarts: 1/440, execs: 2645 (55/sec), cover: 487, uptime: 48s
```
Let it run, and if the # of crashers is > 0, check out the reports in
the workdir where you should be able to find the panic goroutine stack
traces.
*/
package roaring
import (
"fmt"
"sort"
"github.com/mschoch/smat"
"github.com/bits-and-blooms/bitset"
)
// fuzz test using state machine driven by byte stream.
func FuzzSmat(data []byte) int {
return smat.Fuzz(&smatContext{}, smat.ActionID('S'), smat.ActionID('T'),
smatActionMap, data)
}
var smatDebug = false
func smatLog(prefix, format string, args ...interface{}) {
if smatDebug {
fmt.Print(prefix)
fmt.Printf(format, args...)
}
}
type smatContext struct {
pairs []*smatPair
// Two registers, x & y.
x int
y int
actions int
}
type smatPair struct {
bm *Bitmap
bs *bitset.BitSet
}
// ------------------------------------------------------------------
var smatActionMap = smat.ActionMap{
smat.ActionID('X'): smatAction("x++", smatWrap(func(c *smatContext) { c.x++ })),
smat.ActionID('x'): smatAction("x--", smatWrap(func(c *smatContext) { c.x-- })),
smat.ActionID('Y'): smatAction("y++", smatWrap(func(c *smatContext) { c.y++ })),
smat.ActionID('y'): smatAction("y--", smatWrap(func(c *smatContext) { c.y-- })),
smat.ActionID('*'): smatAction("x*y", smatWrap(func(c *smatContext) { c.x = c.x * c.y })),
smat.ActionID('<'): smatAction("x<<", smatWrap(func(c *smatContext) { c.x = c.x << 1 })),
smat.ActionID('^'): smatAction("swap", smatWrap(func(c *smatContext) { c.x, c.y = c.y, c.x })),
smat.ActionID('['): smatAction(" pushPair", smatWrap(smatPushPair)),
smat.ActionID(']'): smatAction(" popPair", smatWrap(smatPopPair)),
smat.ActionID('B'): smatAction(" setBit", smatWrap(smatSetBit)),
smat.ActionID('b'): smatAction(" removeBit", smatWrap(smatRemoveBit)),
smat.ActionID('o'): smatAction(" or", smatWrap(smatOr)),
smat.ActionID('a'): smatAction(" and", smatWrap(smatAnd)),
smat.ActionID('#'): smatAction(" cardinality", smatWrap(smatCardinality)),
smat.ActionID('O'): smatAction(" orCardinality", smatWrap(smatOrCardinality)),
smat.ActionID('A'): smatAction(" andCardinality", smatWrap(smatAndCardinality)),
smat.ActionID('c'): smatAction(" clear", smatWrap(smatClear)),
smat.ActionID('r'): smatAction(" runOptimize", smatWrap(smatRunOptimize)),
smat.ActionID('e'): smatAction(" isEmpty", smatWrap(smatIsEmpty)),
smat.ActionID('i'): smatAction(" intersects", smatWrap(smatIntersects)),
smat.ActionID('f'): smatAction(" flip", smatWrap(smatFlip)),
smat.ActionID('-'): smatAction(" difference", smatWrap(smatDifference)),
}
var smatRunningPercentActions []smat.PercentAction
func init() {
var ids []int
for actionId := range smatActionMap {
ids = append(ids, int(actionId))
}
sort.Ints(ids)
pct := 100 / len(smatActionMap)
for _, actionId := range ids {
smatRunningPercentActions = append(smatRunningPercentActions,
smat.PercentAction{pct, smat.ActionID(actionId)})
}
smatActionMap[smat.ActionID('S')] = smatAction("SETUP", smatSetupFunc)
smatActionMap[smat.ActionID('T')] = smatAction("TEARDOWN", smatTeardownFunc)
}
// We only have one smat state: running.
func smatRunning(next byte) smat.ActionID {
return smat.PercentExecute(next, smatRunningPercentActions...)
}
func smatAction(name string, f func(ctx smat.Context) (smat.State, error)) func(smat.Context) (smat.State, error) {
return func(ctx smat.Context) (smat.State, error) {
c := ctx.(*smatContext)
c.actions++
smatLog(" ", "%s\n", name)
return f(ctx)
}
}
// Creates an smat action func based on a simple callback.
func smatWrap(cb func(c *smatContext)) func(smat.Context) (next smat.State, err error) {
return func(ctx smat.Context) (next smat.State, err error) {
c := ctx.(*smatContext)
cb(c)
return smatRunning, nil
}
}
// Invokes a callback function with the input v bounded to len(c.pairs).
func (c *smatContext) withPair(v int, cb func(*smatPair)) {
if len(c.pairs) > 0 {
if v < 0 {
v = -v
}
v = v % len(c.pairs)
cb(c.pairs[v])
}
}
// ------------------------------------------------------------------
func smatSetupFunc(ctx smat.Context) (next smat.State, err error) {
return smatRunning, nil
}
func smatTeardownFunc(ctx smat.Context) (next smat.State, err error) {
return nil, err
}
// ------------------------------------------------------------------
func smatPushPair(c *smatContext) {
c.pairs = append(c.pairs, &smatPair{
bm: NewBitmap(),
bs: bitset.New(100),
})
}
func smatPopPair(c *smatContext) {
if len(c.pairs) > 0 {
c.pairs = c.pairs[0 : len(c.pairs)-1]
}
}
func smatSetBit(c *smatContext) {
c.withPair(c.x, func(p *smatPair) {
y := uint32(c.y)
p.bm.AddInt(int(y))
p.bs.Set(uint(y))
p.checkEquals()
})
}
func smatRemoveBit(c *smatContext) {
c.withPair(c.x, func(p *smatPair) {
y := uint32(c.y)
p.bm.Remove(y)
p.bs.Clear(uint(y))
p.checkEquals()
})
}
func smatAnd(c *smatContext) {
c.withPair(c.x, func(px *smatPair) {
c.withPair(c.y, func(py *smatPair) {
px.bm.And(py.bm)
px.bs = px.bs.Intersection(py.bs)
px.checkEquals()
py.checkEquals()
})
})
}
func smatOr(c *smatContext) {
c.withPair(c.x, func(px *smatPair) {
c.withPair(c.y, func(py *smatPair) {
px.bm.Or(py.bm)
px.bs = px.bs.Union(py.bs)
px.checkEquals()
py.checkEquals()
})
})
}
func smatAndCardinality(c *smatContext) {
c.withPair(c.x, func(px *smatPair) {
c.withPair(c.y, func(py *smatPair) {
c0 := px.bm.AndCardinality(py.bm)
c1 := px.bs.IntersectionCardinality(py.bs)
if c0 != uint64(c1) {
panic("expected same add cardinality")
}
px.checkEquals()
py.checkEquals()
})
})
}
func smatOrCardinality(c *smatContext) {
c.withPair(c.x, func(px *smatPair) {
c.withPair(c.y, func(py *smatPair) {
c0 := px.bm.OrCardinality(py.bm)
c1 := px.bs.UnionCardinality(py.bs)
if c0 != uint64(c1) {
panic("expected same or cardinality")
}
px.checkEquals()
py.checkEquals()
})
})
}
func smatRunOptimize(c *smatContext) {
c.withPair(c.x, func(px *smatPair) {
px.bm.RunOptimize()
px.checkEquals()
})
}
func smatClear(c *smatContext) {
c.withPair(c.x, func(px *smatPair) {
px.bm.Clear()
px.bs = px.bs.ClearAll()
px.checkEquals()
})
}
func smatCardinality(c *smatContext) {
c.withPair(c.x, func(px *smatPair) {
c0 := px.bm.GetCardinality()
c1 := px.bs.Count()
if c0 != uint64(c1) {
panic("expected same cardinality")
}
})
}
func smatIsEmpty(c *smatContext) {
c.withPair(c.x, func(px *smatPair) {
c0 := px.bm.IsEmpty()
c1 := px.bs.None()
if c0 != c1 {
panic("expected same is empty")
}
})
}
func smatIntersects(c *smatContext) {
c.withPair(c.x, func(px *smatPair) {
c.withPair(c.y, func(py *smatPair) {
v0 := px.bm.Intersects(py.bm)
v1 := px.bs.IntersectionCardinality(py.bs) > 0
if v0 != v1 {
panic("intersects not equal")
}
px.checkEquals()
py.checkEquals()
})
})
}
func smatFlip(c *smatContext) {
c.withPair(c.x, func(p *smatPair) {
y := uint32(c.y)
p.bm.Flip(uint64(y), uint64(y)+1)
p.bs = p.bs.Flip(uint(y))
p.checkEquals()
})
}
func smatDifference(c *smatContext) {
c.withPair(c.x, func(px *smatPair) {
c.withPair(c.y, func(py *smatPair) {
px.bm.AndNot(py.bm)
px.bs = px.bs.Difference(py.bs)
px.checkEquals()
py.checkEquals()
})
})
}
func (p *smatPair) checkEquals() {
if !p.equalsBitSet(p.bs, p.bm) {
panic("bitset mismatch")
}
}
func (p *smatPair) equalsBitSet(a *bitset.BitSet, b *Bitmap) bool {
for i, e := a.NextSet(0); e; i, e = a.NextSet(i + 1) {
if !b.ContainsInt(int(i)) {
fmt.Printf("in a bitset, not b bitmap, i: %d\n", i)
fmt.Printf(" a bitset: %s\n b bitmap: %s\n",
a.String(), b.String())
return false
}
}
i := b.Iterator()
for i.HasNext() {
v := i.Next()
if !a.Test(uint(v)) {
fmt.Printf("in b bitmap, not a bitset, v: %d\n", v)
fmt.Printf(" a bitset: %s\n b bitmap: %s\n",
a.String(), b.String())
return false
}
}
return true
}

305
vendor/github.com/RoaringBitmap/roaring/util.go generated vendored Normal file
View File

@@ -0,0 +1,305 @@
package roaring
import (
"math"
"math/rand"
"sort"
)
const (
arrayDefaultMaxSize = 4096 // containers with 4096 or fewer integers should be array containers.
arrayLazyLowerBound = 1024
maxCapacity = 1 << 16
serialCookieNoRunContainer = 12346 // only arrays and bitmaps
invalidCardinality = -1
serialCookie = 12347 // runs, arrays, and bitmaps
noOffsetThreshold = 4
// MaxUint32 is the largest uint32 value.
MaxUint32 = math.MaxUint32
// MaxRange is One more than the maximum allowed bitmap bit index. For use as an upper
// bound for ranges.
MaxRange uint64 = MaxUint32 + 1
// MaxUint16 is the largest 16 bit unsigned int.
// This is the largest value an interval16 can store.
MaxUint16 = math.MaxUint16
// Compute wordSizeInBytes, the size of a word in bytes.
_m = ^uint64(0)
_logS = _m>>8&1 + _m>>16&1 + _m>>32&1
wordSizeInBytes = 1 << _logS
// other constants used in ctz_generic.go
wordSizeInBits = wordSizeInBytes << 3 // word size in bits
)
const maxWord = 1<<wordSizeInBits - 1
// doesn't apply to runContainers
func getSizeInBytesFromCardinality(card int) int {
if card > arrayDefaultMaxSize {
// bitmapContainer
return maxCapacity / 8
}
// arrayContainer
return 2 * card
}
func fill(arr []uint64, val uint64) {
for i := range arr {
arr[i] = val
}
}
func fillRange(arr []uint64, start, end int, val uint64) {
for i := start; i < end; i++ {
arr[i] = val
}
}
func fillArrayAND(container []uint16, bitmap1, bitmap2 []uint64) {
if len(bitmap1) != len(bitmap2) {
panic("array lengths don't match")
}
// TODO: rewrite in assembly
pos := 0
for k := range bitmap1 {
bitset := bitmap1[k] & bitmap2[k]
for bitset != 0 {
t := bitset & -bitset
container[pos] = uint16((k*64 + int(popcount(t-1))))
pos = pos + 1
bitset ^= t
}
}
}
func fillArrayANDNOT(container []uint16, bitmap1, bitmap2 []uint64) {
if len(bitmap1) != len(bitmap2) {
panic("array lengths don't match")
}
// TODO: rewrite in assembly
pos := 0
for k := range bitmap1 {
bitset := bitmap1[k] &^ bitmap2[k]
for bitset != 0 {
t := bitset & -bitset
container[pos] = uint16((k*64 + int(popcount(t-1))))
pos = pos + 1
bitset ^= t
}
}
}
func fillArrayXOR(container []uint16, bitmap1, bitmap2 []uint64) {
if len(bitmap1) != len(bitmap2) {
panic("array lengths don't match")
}
// TODO: rewrite in assembly
pos := 0
for k := 0; k < len(bitmap1); k++ {
bitset := bitmap1[k] ^ bitmap2[k]
for bitset != 0 {
t := bitset & -bitset
container[pos] = uint16((k*64 + int(popcount(t-1))))
pos = pos + 1
bitset ^= t
}
}
}
func highbits(x uint32) uint16 {
return uint16(x >> 16)
}
func lowbits(x uint32) uint16 {
return uint16(x & maxLowBit)
}
const maxLowBit = 0xFFFF
func flipBitmapRange(bitmap []uint64, start int, end int) {
if start >= end {
return
}
firstword := start / 64
endword := (end - 1) / 64
bitmap[firstword] ^= ^(^uint64(0) << uint(start%64))
for i := firstword; i < endword; i++ {
bitmap[i] = ^bitmap[i]
}
bitmap[endword] ^= ^uint64(0) >> (uint(-end) % 64)
}
func resetBitmapRange(bitmap []uint64, start int, end int) {
if start >= end {
return
}
firstword := start / 64
endword := (end - 1) / 64
if firstword == endword {
bitmap[firstword] &= ^((^uint64(0) << uint(start%64)) & (^uint64(0) >> (uint(-end) % 64)))
return
}
bitmap[firstword] &= ^(^uint64(0) << uint(start%64))
for i := firstword + 1; i < endword; i++ {
bitmap[i] = 0
}
bitmap[endword] &= ^(^uint64(0) >> (uint(-end) % 64))
}
func setBitmapRange(bitmap []uint64, start int, end int) {
if start >= end {
return
}
firstword := start / 64
endword := (end - 1) / 64
if firstword == endword {
bitmap[firstword] |= (^uint64(0) << uint(start%64)) & (^uint64(0) >> (uint(-end) % 64))
return
}
bitmap[firstword] |= ^uint64(0) << uint(start%64)
for i := firstword + 1; i < endword; i++ {
bitmap[i] = ^uint64(0)
}
bitmap[endword] |= ^uint64(0) >> (uint(-end) % 64)
}
func flipBitmapRangeAndCardinalityChange(bitmap []uint64, start int, end int) int {
before := wordCardinalityForBitmapRange(bitmap, start, end)
flipBitmapRange(bitmap, start, end)
after := wordCardinalityForBitmapRange(bitmap, start, end)
return int(after - before)
}
func resetBitmapRangeAndCardinalityChange(bitmap []uint64, start int, end int) int {
before := wordCardinalityForBitmapRange(bitmap, start, end)
resetBitmapRange(bitmap, start, end)
after := wordCardinalityForBitmapRange(bitmap, start, end)
return int(after - before)
}
func setBitmapRangeAndCardinalityChange(bitmap []uint64, start int, end int) int {
before := wordCardinalityForBitmapRange(bitmap, start, end)
setBitmapRange(bitmap, start, end)
after := wordCardinalityForBitmapRange(bitmap, start, end)
return int(after - before)
}
func wordCardinalityForBitmapRange(bitmap []uint64, start int, end int) uint64 {
answer := uint64(0)
if start >= end {
return answer
}
firstword := start / 64
endword := (end - 1) / 64
for i := firstword; i <= endword; i++ {
answer += popcount(bitmap[i])
}
return answer
}
func selectBitPosition(w uint64, j int) int {
seen := 0
// Divide 64bit
part := w & 0xFFFFFFFF
n := popcount(part)
if n <= uint64(j) {
part = w >> 32
seen += 32
j -= int(n)
}
w = part
// Divide 32bit
part = w & 0xFFFF
n = popcount(part)
if n <= uint64(j) {
part = w >> 16
seen += 16
j -= int(n)
}
w = part
// Divide 16bit
part = w & 0xFF
n = popcount(part)
if n <= uint64(j) {
part = w >> 8
seen += 8
j -= int(n)
}
w = part
// Lookup in final byte
var counter uint
for counter = 0; counter < 8; counter++ {
j -= int((w >> counter) & 1)
if j < 0 {
break
}
}
return seen + int(counter)
}
func panicOn(err error) {
if err != nil {
panic(err)
}
}
type ph struct {
orig int
rand int
}
type pha []ph
func (p pha) Len() int { return len(p) }
func (p pha) Less(i, j int) bool { return p[i].rand < p[j].rand }
func (p pha) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
func getRandomPermutation(n int) []int {
r := make([]ph, n)
for i := 0; i < n; i++ {
r[i].orig = i
r[i].rand = rand.Intn(1 << 29)
}
sort.Sort(pha(r))
m := make([]int, n)
for i := range m {
m[i] = r[i].orig
}
return m
}
func minOfInt(a, b int) int {
if a < b {
return a
}
return b
}
func maxOfInt(a, b int) int {
if a > b {
return a
}
return b
}
func maxOfUint16(a, b uint16) uint16 {
if a > b {
return a
}
return b
}
func minOfUint16(a, b uint16) uint16 {
if a < b {
return a
}
return b
}