Spaces:
Running
Running
File size: 5,310 Bytes
b110593 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 |
// _ _
// __ _____ __ ___ ___ __ _| |_ ___
// \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
// \ V V / __/ (_| |\ V /| | (_| | || __/
// \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
//
// Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
//
// CONTACT: [email protected]
//
package inverted
import (
"context"
"fmt"
"strings"
"github.com/pkg/errors"
"github.com/weaviate/weaviate/adapters/repos/db/helpers"
"github.com/weaviate/weaviate/adapters/repos/db/lsmkv/roaringset"
"github.com/weaviate/weaviate/entities/filters"
"github.com/weaviate/weaviate/entities/models"
"golang.org/x/sync/errgroup"
)
type propValuePair struct {
prop string
operator filters.Operator
// set for all values that can be served by an inverted index, i.e. anything
// that's not a geoRange
value []byte
// only set if operator=OperatorWithinGeoRange, as that cannot be served by a
// byte value from an inverted index
valueGeoRange *filters.GeoRange
docIDs docBitmap
children []*propValuePair
hasFilterableIndex bool
hasSearchableIndex bool
Class *models.Class // The schema
}
func newPropValuePair(class *models.Class) (*propValuePair, error) {
if class == nil {
return nil, errors.Errorf("class must not be nil")
}
return &propValuePair{docIDs: newDocBitmap(), Class: class}, nil
}
func (pv *propValuePair) fetchDocIDs(s *Searcher, limit int) error {
if pv.operator.OnValue() {
// TODO text_rbm_inverted_index find better way check whether prop len
if strings.HasSuffix(pv.prop, filters.InternalPropertyLength) &&
!pv.Class.InvertedIndexConfig.IndexPropertyLength {
return errors.Errorf("Property length must be indexed to be filterable! add `IndexPropertyLength: true` to the invertedIndexConfig in %v. Geo-coordinates, phone numbers and data blobs are not supported by property length.", pv.Class.Class)
}
if pv.operator == filters.OperatorIsNull && !pv.Class.InvertedIndexConfig.IndexNullState {
return errors.Errorf("Nullstate must be indexed to be filterable! Add `indexNullState: true` to the invertedIndexConfig")
}
if (pv.prop == filters.InternalPropCreationTimeUnix ||
pv.prop == filters.InternalPropLastUpdateTimeUnix) &&
!pv.Class.InvertedIndexConfig.IndexTimestamps {
return errors.Errorf("Timestamps must be indexed to be filterable! Add `IndexTimestamps: true` to the InvertedIndexConfig in %v", pv.Class.Class)
}
var bucketName string
if pv.hasFilterableIndex {
bucketName = helpers.BucketFromPropNameLSM(pv.prop)
} else if pv.hasSearchableIndex {
bucketName = helpers.BucketSearchableFromPropNameLSM(pv.prop)
} else {
return errors.Errorf("bucket for prop %s not found - is it indexed?", pv.prop)
}
b := s.store.Bucket(bucketName)
// TODO: I think we can delete this check entirely. The bucket will never be nill, and routines should now check if their particular feature is active in the schema. However, not all those routines have checks yet.
if b == nil && pv.operator != filters.OperatorWithinGeoRange {
// a nil bucket is ok for a WithinGeoRange filter, as this query is not
// served by the inverted index, but propagated to a secondary index in
// .docPointers()
return errors.Errorf("bucket for prop %s not found - is it indexed?", pv.prop)
}
ctx := context.TODO() // TODO: pass through instead of spawning new
dbm, err := s.docBitmap(ctx, b, limit, pv)
if err != nil {
return err
}
pv.docIDs = dbm
} else {
eg := errgroup.Group{}
// prevent unbounded concurrency, see
// https://github.com/weaviate/weaviate/issues/3179 for details
eg.SetLimit(2 * _NUMCPU)
for i, child := range pv.children {
i, child := i, child
eg.Go(func() error {
// Explicitly set the limit to 0 (=unlimited) as this is a nested filter,
// otherwise we run into situations where each subfilter on their own
// runs into the limit, possibly yielding in "less than limit" results
// after merging.
err := child.fetchDocIDs(s, 0)
if err != nil {
return errors.Wrapf(err, "nested child %d", i)
}
return nil
})
}
if err := eg.Wait(); err != nil {
return fmt.Errorf("nested query: %w", err)
}
}
return nil
}
func (pv *propValuePair) mergeDocIDs() (*docBitmap, error) {
if pv.operator.OnValue() {
return &pv.docIDs, nil
}
if pv.operator != filters.OperatorAnd && pv.operator != filters.OperatorOr {
return nil, fmt.Errorf("unsupported operator: %s", pv.operator.Name())
}
if len(pv.children) == 0 {
return nil, fmt.Errorf("no children for operator: %s", pv.operator.Name())
}
dbms := make([]*docBitmap, len(pv.children))
for i, child := range pv.children {
dbm, err := child.mergeDocIDs()
if err != nil {
return nil, errors.Wrapf(err, "retrieve doc bitmap of child %d", i)
}
dbms[i] = dbm
}
mergeRes := dbms[0].docIDs.Clone()
mergeFn := mergeRes.And
if pv.operator == filters.OperatorOr {
mergeFn = mergeRes.Or
}
for i := 1; i < len(dbms); i++ {
mergeFn(dbms[i].docIDs)
}
return &docBitmap{
docIDs: roaringset.Condense(mergeRes),
}, nil
}
|