KevinStephenson
Adding in weaviate code
b110593
raw
history blame
5.31 kB
// _ _
// __ _____ __ ___ ___ __ _| |_ ___
// \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
// \ V V / __/ (_| |\ V /| | (_| | || __/
// \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
//
// Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
//
// CONTACT: [email protected]
//
package inverted
import (
"context"
"fmt"
"strings"
"github.com/pkg/errors"
"github.com/weaviate/weaviate/adapters/repos/db/helpers"
"github.com/weaviate/weaviate/adapters/repos/db/lsmkv/roaringset"
"github.com/weaviate/weaviate/entities/filters"
"github.com/weaviate/weaviate/entities/models"
"golang.org/x/sync/errgroup"
)
type propValuePair struct {
prop string
operator filters.Operator
// set for all values that can be served by an inverted index, i.e. anything
// that's not a geoRange
value []byte
// only set if operator=OperatorWithinGeoRange, as that cannot be served by a
// byte value from an inverted index
valueGeoRange *filters.GeoRange
docIDs docBitmap
children []*propValuePair
hasFilterableIndex bool
hasSearchableIndex bool
Class *models.Class // The schema
}
func newPropValuePair(class *models.Class) (*propValuePair, error) {
if class == nil {
return nil, errors.Errorf("class must not be nil")
}
return &propValuePair{docIDs: newDocBitmap(), Class: class}, nil
}
func (pv *propValuePair) fetchDocIDs(s *Searcher, limit int) error {
if pv.operator.OnValue() {
// TODO text_rbm_inverted_index find better way check whether prop len
if strings.HasSuffix(pv.prop, filters.InternalPropertyLength) &&
!pv.Class.InvertedIndexConfig.IndexPropertyLength {
return errors.Errorf("Property length must be indexed to be filterable! add `IndexPropertyLength: true` to the invertedIndexConfig in %v. Geo-coordinates, phone numbers and data blobs are not supported by property length.", pv.Class.Class)
}
if pv.operator == filters.OperatorIsNull && !pv.Class.InvertedIndexConfig.IndexNullState {
return errors.Errorf("Nullstate must be indexed to be filterable! Add `indexNullState: true` to the invertedIndexConfig")
}
if (pv.prop == filters.InternalPropCreationTimeUnix ||
pv.prop == filters.InternalPropLastUpdateTimeUnix) &&
!pv.Class.InvertedIndexConfig.IndexTimestamps {
return errors.Errorf("Timestamps must be indexed to be filterable! Add `IndexTimestamps: true` to the InvertedIndexConfig in %v", pv.Class.Class)
}
var bucketName string
if pv.hasFilterableIndex {
bucketName = helpers.BucketFromPropNameLSM(pv.prop)
} else if pv.hasSearchableIndex {
bucketName = helpers.BucketSearchableFromPropNameLSM(pv.prop)
} else {
return errors.Errorf("bucket for prop %s not found - is it indexed?", pv.prop)
}
b := s.store.Bucket(bucketName)
// TODO: I think we can delete this check entirely. The bucket will never be nill, and routines should now check if their particular feature is active in the schema. However, not all those routines have checks yet.
if b == nil && pv.operator != filters.OperatorWithinGeoRange {
// a nil bucket is ok for a WithinGeoRange filter, as this query is not
// served by the inverted index, but propagated to a secondary index in
// .docPointers()
return errors.Errorf("bucket for prop %s not found - is it indexed?", pv.prop)
}
ctx := context.TODO() // TODO: pass through instead of spawning new
dbm, err := s.docBitmap(ctx, b, limit, pv)
if err != nil {
return err
}
pv.docIDs = dbm
} else {
eg := errgroup.Group{}
// prevent unbounded concurrency, see
// https://github.com/weaviate/weaviate/issues/3179 for details
eg.SetLimit(2 * _NUMCPU)
for i, child := range pv.children {
i, child := i, child
eg.Go(func() error {
// Explicitly set the limit to 0 (=unlimited) as this is a nested filter,
// otherwise we run into situations where each subfilter on their own
// runs into the limit, possibly yielding in "less than limit" results
// after merging.
err := child.fetchDocIDs(s, 0)
if err != nil {
return errors.Wrapf(err, "nested child %d", i)
}
return nil
})
}
if err := eg.Wait(); err != nil {
return fmt.Errorf("nested query: %w", err)
}
}
return nil
}
func (pv *propValuePair) mergeDocIDs() (*docBitmap, error) {
if pv.operator.OnValue() {
return &pv.docIDs, nil
}
if pv.operator != filters.OperatorAnd && pv.operator != filters.OperatorOr {
return nil, fmt.Errorf("unsupported operator: %s", pv.operator.Name())
}
if len(pv.children) == 0 {
return nil, fmt.Errorf("no children for operator: %s", pv.operator.Name())
}
dbms := make([]*docBitmap, len(pv.children))
for i, child := range pv.children {
dbm, err := child.mergeDocIDs()
if err != nil {
return nil, errors.Wrapf(err, "retrieve doc bitmap of child %d", i)
}
dbms[i] = dbm
}
mergeRes := dbms[0].docIDs.Clone()
mergeFn := mergeRes.And
if pv.operator == filters.OperatorOr {
mergeFn = mergeRes.Or
}
for i := 1; i < len(dbms); i++ {
mergeFn(dbms[i].docIDs)
}
return &docBitmap{
docIDs: roaringset.Condense(mergeRes),
}, nil
}