Spaces:
Running
Running
File size: 4,388 Bytes
b110593 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 |
// _ _
// __ _____ __ ___ ___ __ _| |_ ___
// \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
// \ V V / __/ (_| |\ V /| | (_| | || __/
// \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
//
// Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
//
// CONTACT: [email protected]
//
package inverted
import (
"context"
"encoding/binary"
"fmt"
"github.com/pkg/errors"
"github.com/weaviate/sroar"
"github.com/weaviate/weaviate/adapters/repos/db/lsmkv"
"github.com/weaviate/weaviate/entities/filters"
)
func (s *Searcher) docBitmap(ctx context.Context, b *lsmkv.Bucket, limit int,
pv *propValuePair,
) (docBitmap, error) {
// geo props cannot be served by the inverted index and they require an
// external index. So, instead of trying to serve this chunk of the filter
// request internally, we can pass it to an external geo index
if pv.operator == filters.OperatorWithinGeoRange {
return s.docBitmapGeo(ctx, pv)
}
// all other operators perform operations on the inverted index which we
// can serve directly
if pv.hasFilterableIndex {
// bucket with strategy roaring set serves bitmaps directly
if b.Strategy() == lsmkv.StrategyRoaringSet {
return s.docBitmapInvertedRoaringSet(ctx, b, limit, pv)
}
// bucket with strategy set serves docIds used to build bitmap
return s.docBitmapInvertedSet(ctx, b, limit, pv)
}
if pv.hasSearchableIndex {
// bucket with strategy map serves docIds used to build bitmap
// and frequencies, which are ignored for filtering
return s.docBitmapInvertedMap(ctx, b, limit, pv)
}
return docBitmap{}, fmt.Errorf("property '%s' is neither filterable nor searchable", pv.prop)
}
func (s *Searcher) docBitmapInvertedRoaringSet(ctx context.Context, b *lsmkv.Bucket,
limit int, pv *propValuePair,
) (docBitmap, error) {
out := newUninitializedDocBitmap()
isEmpty := true
var readFn RoaringSetReadFn = func(k []byte, docIDs *sroar.Bitmap) (bool, error) {
if isEmpty {
out.docIDs = docIDs
isEmpty = false
} else {
out.docIDs.Or(docIDs)
}
if limit > 0 && out.docIDs.GetCardinality() >= limit {
return false, nil
}
return true, nil
}
rr := NewRowReaderRoaringSet(b, pv.value, pv.operator, false)
if err := rr.Read(ctx, readFn); err != nil {
return out, errors.Wrap(err, "read row")
}
if isEmpty {
return newDocBitmap(), nil
}
return out, nil
}
func (s *Searcher) docBitmapInvertedSet(ctx context.Context, b *lsmkv.Bucket,
limit int, pv *propValuePair,
) (docBitmap, error) {
out := newDocBitmap()
var readFn ReadFn = func(k []byte, ids [][]byte) (bool, error) {
for _, asBytes := range ids {
out.docIDs.Set(binary.LittleEndian.Uint64(asBytes))
}
if limit > 0 && out.docIDs.GetCardinality() >= limit {
return false, nil
}
return true, nil
}
rr := NewRowReader(b, pv.value, pv.operator, false)
if err := rr.Read(ctx, readFn); err != nil {
return out, errors.Wrap(err, "read row")
}
return out, nil
}
func (s *Searcher) docBitmapInvertedMap(ctx context.Context, b *lsmkv.Bucket,
limit int, pv *propValuePair,
) (docBitmap, error) {
out := newDocBitmap()
var readFn ReadFnFrequency = func(k []byte, pairs []lsmkv.MapPair) (bool, error) {
for _, pair := range pairs {
// this entry has a frequency, but that's only used for bm25, not for
// pure filtering, so we can ignore it here
if s.shardVersion < 2 {
out.docIDs.Set(binary.LittleEndian.Uint64(pair.Key))
} else {
out.docIDs.Set(binary.BigEndian.Uint64(pair.Key))
}
}
if limit > 0 && out.docIDs.GetCardinality() >= limit {
return false, nil
}
return true, nil
}
rr := NewRowReaderFrequency(b, pv.value, pv.operator, false, s.shardVersion)
if err := rr.Read(ctx, readFn); err != nil {
return out, errors.Wrap(err, "read row")
}
return out, nil
}
func (s *Searcher) docBitmapGeo(ctx context.Context, pv *propValuePair) (docBitmap, error) {
out := newDocBitmap()
propIndex, ok := s.propIndices.ByProp(pv.prop)
if !ok {
return out, nil
}
res, err := propIndex.GeoIndex.WithinRange(ctx, *pv.valueGeoRange)
if err != nil {
return out, errors.Wrapf(err, "geo index range search on prop %q", pv.prop)
}
out.docIDs.SetMany(res)
return out, nil
}
|