Spaces:

MVPilgrim
/

SemanticSearchPOC

Running

SemanticSearchPOC / adapters /repos /db /inverted /searcher.go

KevinStephenson

Adding in weaviate code

b110593 over 1 year ago

28.2 kB

	// _ _
	// __ _____ __ ___ ___ __ _\| \|_ ___
	// \ \ /\ / / _ \/ _` \ \ / / \|/ _` \| __/ _ \
	// \ V V / __/ (_\| \|\ V /\| \| (_\| \| \|\| __/
	// \_/\_/ \___\|\__,_\| \_/ \|_\|\__,_\|\__\___\|
	//
	// Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
	//
	// CONTACT: [email protected]
	//

	package inverted

	import (
	"context"
	"encoding/binary"
	"fmt"
	"strconv"
	"time"

	"github.com/google/uuid"
	"github.com/pkg/errors"
	"github.com/sirupsen/logrus"
	"github.com/weaviate/sroar"
	"github.com/weaviate/weaviate/adapters/repos/db/helpers"
	"github.com/weaviate/weaviate/adapters/repos/db/inverted/stopwords"
	"github.com/weaviate/weaviate/adapters/repos/db/lsmkv"
	"github.com/weaviate/weaviate/adapters/repos/db/propertyspecific"
	"github.com/weaviate/weaviate/adapters/repos/db/sorter"
	"github.com/weaviate/weaviate/entities/additional"
	"github.com/weaviate/weaviate/entities/filters"
	"github.com/weaviate/weaviate/entities/inverted"
	"github.com/weaviate/weaviate/entities/models"
	"github.com/weaviate/weaviate/entities/schema"
	"github.com/weaviate/weaviate/entities/storobj"
	"golang.org/x/sync/errgroup"
	)

	type Searcher struct {
	logger logrus.FieldLogger
	store *lsmkv.Store
	schema schema.Schema
	classSearcher ClassSearcher // to allow recursive searches on ref-props
	propIndices propertyspecific.Indices
	stopwords stopwords.StopwordDetector
	shardVersion uint16
	isFallbackToSearchable IsFallbackToSearchable
	tenant string
	// nestedCrossRefLimit limits the number of nested cross refs returned for a query
	nestedCrossRefLimit int64
	}

	func NewSearcher(logger logrus.FieldLogger, store *lsmkv.Store,
	schema schema.Schema, propIndices propertyspecific.Indices,
	classSearcher ClassSearcher, stopwords stopwords.StopwordDetector,
	shardVersion uint16, isFallbackToSearchable IsFallbackToSearchable,
	tenant string, nestedCrossRefLimit int64,
	) *Searcher {
	return &Searcher{
	logger: logger,
	store: store,
	schema: schema,
	propIndices: propIndices,
	classSearcher: classSearcher,
	stopwords: stopwords,
	shardVersion: shardVersion,
	isFallbackToSearchable: isFallbackToSearchable,
	tenant: tenant,
	nestedCrossRefLimit: nestedCrossRefLimit,
	}
	}

	// Objects returns a list of full objects
	func (s *Searcher) Objects(ctx context.Context, limit int,
	filter *filters.LocalFilter, sort []filters.Sort, additional additional.Properties,
	className schema.ClassName,
	) ([]*storobj.Object, error) {
	allowList, err := s.docIDs(ctx, filter, additional, className, limit)
	if err != nil {
	return nil, err
	}

	var it docIDsIterator
	if len(sort) > 0 {
	docIDs, err := s.sort(ctx, limit, sort, allowList, additional, className)
	if err != nil {
	return nil, errors.Wrap(err, "sort doc ids")
	}
	it = newSliceDocIDsIterator(docIDs)
	} else {
	it = allowList.LimitedIterator(limit)
	}

	return s.objectsByDocID(it, additional)
	}

	func (s *Searcher) sort(ctx context.Context, limit int, sort []filters.Sort, docIDs helpers.AllowList,
	additional additional.Properties, className schema.ClassName,
	) ([]uint64, error) {
	lsmSorter, err := sorter.NewLSMSorter(s.store, s.schema, className)
	if err != nil {
	return nil, err
	}
	return lsmSorter.SortDocIDs(ctx, limit, sort, docIDs)
	}

	func (s *Searcher) objectsByDocID(it docIDsIterator,
	additional additional.Properties,
	) ([]*storobj.Object, error) {
	bucket := s.store.Bucket(helpers.ObjectsBucketLSM)
	if bucket == nil {
	return nil, errors.Errorf("objects bucket not found")
	}

	out := make([]*storobj.Object, it.Len())
	docIDBytes := make([]byte, 8)

	i := 0
	for docID, ok := it.Next(); ok; docID, ok = it.Next() {
	binary.LittleEndian.PutUint64(docIDBytes, docID)
	res, err := bucket.GetBySecondary(0, docIDBytes)
	if err != nil {
	return nil, err
	}

	if res == nil {
	continue
	}

	var unmarshalled *storobj.Object
	if additional.ReferenceQuery {
	unmarshalled, err = storobj.FromBinaryUUIDOnly(res)
	} else {
	unmarshalled, err = storobj.FromBinaryOptional(res, additional)
	}
	if err != nil {
	return nil, errors.Wrapf(err, "unmarshal data object at position %d", i)
	}

	out[i] = unmarshalled
	i++
	}

	return out[:i], nil
	}

	// DocIDs is similar to Objects, but does not actually resolve the docIDs to
	// full objects. Instead it returns the pure object id pointers. They can then
	// be used in a secondary index (e.g. vector index)
	//
	// DocID queries does not contain a limit by design, as we won't know if the limit
	// wouldn't remove the item that is most important for the follow up query.
	// Imagine the user sets the limit to 1 and the follow-up is a vector search.
	// If we already limited the allowList to 1, the vector search would be
	// pointless, as only the first element would be allowed, regardless of which
	// had the shortest distance
	func (s Searcher) DocIDs(ctx context.Context, filter filters.LocalFilter,
	additional additional.Properties, className schema.ClassName,
	) (helpers.AllowList, error) {
	return s.docIDs(ctx, filter, additional, className, 0)
	}

	func (s Searcher) docIDs(ctx context.Context, filter filters.LocalFilter,
	additional additional.Properties, className schema.ClassName,
	limit int,
	) (helpers.AllowList, error) {
	pv, err := s.extractPropValuePair(filter.Root, className)
	if err != nil {
	return nil, err
	}

	if err := pv.fetchDocIDs(s, limit); err != nil {
	return nil, errors.Wrap(err, "fetch doc ids for prop/value pair")
	}

	dbm, err := pv.mergeDocIDs()
	if err != nil {
	return nil, errors.Wrap(err, "merge doc ids by operator")
	}

	return helpers.NewAllowListFromBitmap(dbm.docIDs), nil
	}

	func (s Searcher) extractPropValuePair(filter filters.Clause,
	className schema.ClassName,
	) (*propValuePair, error) {
	class := s.schema.FindClassByName(schema.ClassName(className))
	if class == nil {
	return nil, fmt.Errorf("class %q not found", className)
	}
	out, err := newPropValuePair(class)
	if err != nil {
	return nil, errors.Wrap(err, "new prop value pair")
	}
	if filter.Operands != nil {
	// nested filter
	children, err := s.extractPropValuePairs(filter.Operands, className)
	if err != nil {
	return nil, err
	}
	out.children = children
	out.operator = filter.Operator
	return out, nil
	}

	if filter.Operator == filters.ContainsAny \|\| filter.Operator == filters.ContainsAll {
	return s.extractContains(filter.On, filter.Value.Type, filter.Value.Value, filter.Operator, class)
	}

	// on value or non-nested filter
	props := filter.On.Slice()
	propName := props[0]

	if s.onInternalProp(propName) {
	return s.extractInternalProp(propName, filter.Value.Type, filter.Value.Value, filter.Operator, class)
	}

	if extractedPropName, ok := schema.IsPropertyLength(propName, 0); ok {
	property, err := s.schema.GetProperty(className, schema.PropertyName(extractedPropName))
	if err != nil {
	return nil, err
	}
	return s.extractPropertyLength(property, filter.Value.Type, filter.Value.Value, filter.Operator, class)
	}

	property, err := s.schema.GetProperty(className, schema.PropertyName(propName))
	if err != nil {
	return nil, err
	}

	if s.onRefProp(property) && len(props) != 1 {
	return s.extractReferenceFilter(property, filter, class)
	}

	if s.onRefProp(property) && filter.Value.Type == schema.DataTypeInt {
	// ref prop and int type is a special case, the user is looking for the
	// reference count as opposed to the content
	return s.extractReferenceCount(property, filter.Value.Value, filter.Operator, class)
	}

	if filter.Operator == filters.OperatorIsNull {
	return s.extractPropertyNull(property, filter.Value.Type, filter.Value.Value, filter.Operator, class)
	}

	if s.onGeoProp(property) {
	return s.extractGeoFilter(property, filter.Value.Value, filter.Value.Type, filter.Operator, class)
	}

	if s.onUUIDProp(property) {
	return s.extractUUIDFilter(property, filter.Value.Value, filter.Value.Type, filter.Operator, class)
	}

	if s.onTokenizableProp(property) {
	return s.extractTokenizableProp(property, filter.Value.Type, filter.Value.Value, filter.Operator, class)
	}

	return s.extractPrimitiveProp(property, filter.Value.Type, filter.Value.Value, filter.Operator, class)
	}

	func (s Searcher) extractPropValuePairs(operands []filters.Clause, className schema.ClassName) ([]propValuePair, error) {
	children := make([]*propValuePair, len(operands))
	eg := errgroup.Group{}
	// prevent unbounded concurrency, see
	// https://github.com/weaviate/weaviate/issues/3179 for details
	eg.SetLimit(2 * _NUMCPU)

	for i, clause := range operands {
	i, clause := i, clause
	eg.Go(func() error {
	child, err := s.extractPropValuePair(&clause, className)
	if err != nil {
	return errors.Wrapf(err, "nested clause at pos %d", i)
	}
	children[i] = child

	return nil
	})
	}
	if err := eg.Wait(); err != nil {
	return nil, fmt.Errorf("nested query: %w", err)
	}
	return children, nil
	}

	func (s Searcher) extractReferenceFilter(prop models.Property,
	filter filters.Clause, class models.Class,
	) (*propValuePair, error) {
	ctx := context.TODO()
	return newRefFilterExtractor(s.logger, s.classSearcher, filter, class, prop, s.tenant, s.nestedCrossRefLimit).
	Do(ctx)
	}

	func (s Searcher) extractPrimitiveProp(prop models.Property, propType schema.DataType,
	value interface{}, operator filters.Operator, class *models.Class,
	) (*propValuePair, error) {
	var extractValueFn func(in interface{}) ([]byte, error)
	switch propType {
	case schema.DataTypeBoolean:
	extractValueFn = s.extractBoolValue
	case schema.DataTypeInt:
	extractValueFn = s.extractIntValue
	case schema.DataTypeNumber:
	extractValueFn = s.extractNumberValue
	case schema.DataTypeDate:
	extractValueFn = s.extractDateValue
	case "":
	return nil, fmt.Errorf("data type cannot be empty")
	default:
	return nil, fmt.Errorf("data type %q not supported in query", propType)
	}

	byteValue, err := extractValueFn(value)
	if err != nil {
	return nil, err
	}

	hasFilterableIndex := HasFilterableIndex(prop)
	hasSearchableIndex := HasSearchableIndex(prop)

	if !hasFilterableIndex && !hasSearchableIndex {
	return nil, inverted.NewMissingFilterableIndexError(prop.Name)
	}

	return &propValuePair{
	value: byteValue,
	prop: prop.Name,
	operator: operator,
	hasFilterableIndex: hasFilterableIndex,
	hasSearchableIndex: hasSearchableIndex,
	Class: class,
	}, nil
	}

	func (s Searcher) extractReferenceCount(prop models.Property, value interface{},
	operator filters.Operator, class *models.Class,
	) (*propValuePair, error) {
	byteValue, err := s.extractIntCountValue(value)
	if err != nil {
	return nil, err
	}

	hasFilterableIndex := HasFilterableIndexMetaCount && HasInvertedIndex(prop)
	hasSearchableIndex := HasSearchableIndexMetaCount && HasInvertedIndex(prop)

	if !hasFilterableIndex && !hasSearchableIndex {
	return nil, inverted.NewMissingFilterableMetaCountIndexError(prop.Name)
	}

	return &propValuePair{
	value: byteValue,
	prop: helpers.MetaCountProp(prop.Name),
	operator: operator,
	hasFilterableIndex: hasFilterableIndex,
	hasSearchableIndex: hasSearchableIndex,
	Class: class,
	}, nil
	}

	func (s Searcher) extractGeoFilter(prop models.Property, value interface{},
	valueType schema.DataType, operator filters.Operator, class *models.Class,
	) (*propValuePair, error) {
	if valueType != schema.DataTypeGeoCoordinates {
	return nil, fmt.Errorf("prop %q is of type geoCoordinates, it can only"+
	"be used with geoRange filters", prop.Name)
	}

	parsed := value.(filters.GeoRange)

	return &propValuePair{
	value: nil, // not going to be served by an inverted index
	valueGeoRange: &parsed,
	prop: prop.Name,
	operator: operator,
	hasFilterableIndex: HasFilterableIndex(prop),
	hasSearchableIndex: HasSearchableIndex(prop),
	Class: class,
	}, nil
	}

	func (s Searcher) extractUUIDFilter(prop models.Property, value interface{},
	valueType schema.DataType, operator filters.Operator, class *models.Class,
	) (*propValuePair, error) {
	var byteValue []byte

	switch valueType {
	case schema.DataTypeText:
	asStr, ok := value.(string)
	if !ok {
	return nil, fmt.Errorf("expected to see uuid as string in filter, got %T", value)
	}
	parsed, err := uuid.Parse(asStr)
	if err != nil {
	return nil, fmt.Errorf("parse uuid string: %w", err)
	}
	byteValue = parsed[:]
	default:
	return nil, fmt.Errorf("prop %q is of type uuid, the uuid to filter "+
	"on must be specified as a string (e.g. valueText:<uuid>)", prop.Name)
	}

	hasFilterableIndex := HasFilterableIndex(prop)
	hasSearchableIndex := HasSearchableIndex(prop)

	if !hasFilterableIndex && !hasSearchableIndex {
	return nil, inverted.NewMissingFilterableIndexError(prop.Name)
	}

	return &propValuePair{
	value: byteValue,
	prop: prop.Name,
	operator: operator,
	hasFilterableIndex: hasFilterableIndex,
	hasSearchableIndex: hasSearchableIndex,
	Class: class,
	}, nil
	}

	func (s *Searcher) extractInternalProp(propName string, propType schema.DataType, value interface{},
	operator filters.Operator, class *models.Class,
	) (*propValuePair, error) {
	switch propName {
	case filters.InternalPropBackwardsCompatID, filters.InternalPropID:
	return s.extractIDProp(propName, propType, value, operator, class)
	case filters.InternalPropCreationTimeUnix, filters.InternalPropLastUpdateTimeUnix:
	return s.extractTimestampProp(propName, propType, value, operator, class)
	default:
	return nil, fmt.Errorf(
	"failed to extract internal prop, unsupported internal prop '%s'", propName)
	}
	}

	func (s *Searcher) extractIDProp(propName string, propType schema.DataType,
	value interface{}, operator filters.Operator, class *models.Class,
	) (*propValuePair, error) {
	var byteValue []byte

	switch propType {
	case schema.DataTypeText:
	v, ok := value.(string)
	if !ok {
	return nil, fmt.Errorf("expected value to be string, got '%T'", value)
	}
	byteValue = []byte(v)
	default:
	return nil, fmt.Errorf(
	"failed to extract id prop, unsupported type '%T' for prop '%s'", propType, propName)
	}

	return &propValuePair{
	value: byteValue,
	prop: filters.InternalPropID,
	operator: operator,
	hasFilterableIndex: HasFilterableIndexIdProp,
	hasSearchableIndex: HasSearchableIndexIdProp,
	Class: class,
	}, nil
	}

	func (s *Searcher) extractTimestampProp(propName string, propType schema.DataType, value interface{},
	operator filters.Operator, class *models.Class,
	) (*propValuePair, error) {
	var byteValue []byte

	switch propType {
	case schema.DataTypeText:
	v, ok := value.(string)
	if !ok {
	return nil, fmt.Errorf("expected value to be string, got '%T'", value)
	}
	_, err := strconv.ParseInt(v, 10, 64)
	if err != nil {
	return nil, fmt.Errorf("expected value to be timestamp, got '%s'", v)
	}
	byteValue = []byte(v)
	case schema.DataTypeDate:
	v, ok := value.(string)
	if !ok {
	return nil, fmt.Errorf("expected value to be string, got '%T'", value)
	}
	t, err := time.Parse(time.RFC3339, v)
	if err != nil {
	return nil, errors.Wrap(err, "trying parse time as RFC3339 string")
	}

	// if propType is a `valueDate`, we need to convert
	// it to ms before fetching. this is the format by
	// which our timestamps are indexed
	byteValue = []byte(strconv.FormatInt(t.UnixMilli(), 10))
	default:
	return nil, fmt.Errorf(
	"failed to extract timestamp prop, unsupported type '%T' for prop '%s'", propType, propName)
	}

	return &propValuePair{
	value: byteValue,
	prop: propName,
	operator: operator,
	hasFilterableIndex: HasFilterableIndexTimestampProp, // TODO text_rbm_inverted_index & with settings
	hasSearchableIndex: HasSearchableIndexTimestampProp, // TODO text_rbm_inverted_index & with settings
	Class: class,
	}, nil
	}

	func (s Searcher) extractTokenizableProp(prop models.Property, propType schema.DataType,
	value interface{}, operator filters.Operator, class *models.Class,
	) (*propValuePair, error) {
	var terms []string

	valueString, ok := value.(string)
	if !ok {
	return nil, fmt.Errorf("expected value to be string, got '%T'", value)
	}

	switch propType {
	case schema.DataTypeText:
	// if the operator is like, we cannot apply the regular text-splitting
	// logic as it would remove all wildcard symbols
	if operator == filters.OperatorLike {
	terms = helpers.TokenizeWithWildcards(prop.Tokenization, valueString)
	} else {
	terms = helpers.Tokenize(prop.Tokenization, valueString)
	}
	default:
	return nil, fmt.Errorf("expected value type to be text, got %v", propType)
	}

	hasFilterableIndex := HasFilterableIndex(prop) && !s.isFallbackToSearchable()
	hasSearchableIndex := HasSearchableIndex(prop)

	if !hasFilterableIndex && !hasSearchableIndex {
	return nil, inverted.NewMissingFilterableIndexError(prop.Name)
	}

	propValuePairs := make([]*propValuePair, 0, len(terms))
	for _, term := range terms {
	if s.stopwords.IsStopword(term) {
	continue
	}
	propValuePairs = append(propValuePairs, &propValuePair{
	value: []byte(term),
	prop: prop.Name,
	operator: operator,
	hasFilterableIndex: hasFilterableIndex,
	hasSearchableIndex: hasSearchableIndex,
	Class: class,
	})
	}

	if len(propValuePairs) > 1 {
	return &propValuePair{operator: filters.OperatorAnd, children: propValuePairs, Class: class}, nil
	}
	if len(propValuePairs) == 1 {
	return propValuePairs[0], nil
	}
	return nil, errors.Errorf("invalid search term, only stopwords provided. Stopwords can be configured in class.invertedIndexConfig.stopwords")
	}

	func (s Searcher) extractPropertyLength(prop models.Property, propType schema.DataType,
	value interface{}, operator filters.Operator, class *models.Class,
	) (*propValuePair, error) {
	var byteValue []byte

	switch propType {
	case schema.DataTypeInt:
	b, err := s.extractIntValue(value)
	if err != nil {
	return nil, err
	}
	byteValue = b
	default:
	return nil, fmt.Errorf(
	"failed to extract length of prop, unsupported type '%T' for length of prop '%s'", propType, prop.Name)
	}

	return &propValuePair{
	value: byteValue,
	prop: helpers.PropLength(prop.Name),
	operator: operator,
	hasFilterableIndex: HasFilterableIndexPropLength, // TODO text_rbm_inverted_index & with settings
	hasSearchableIndex: HasSearchableIndexPropLength, // TODO text_rbm_inverted_index & with settings
	Class: class,
	}, nil
	}

	func (s Searcher) extractPropertyNull(prop models.Property, propType schema.DataType,
	value interface{}, operator filters.Operator, class *models.Class,
	) (*propValuePair, error) {
	var valResult []byte

	switch propType {
	case schema.DataTypeBoolean:
	b, err := s.extractBoolValue(value)
	if err != nil {
	return nil, err
	}
	valResult = b
	default:
	return nil, fmt.Errorf(
	"failed to extract null prop, unsupported type '%T' for null prop '%s'", propType, prop.Name)
	}

	return &propValuePair{
	value: valResult,
	prop: helpers.PropNull(prop.Name),
	operator: operator,
	hasFilterableIndex: HasFilterableIndexPropNull, // TODO text_rbm_inverted_index & with settings
	hasSearchableIndex: HasSearchableIndexPropNull, // TODO text_rbm_inverted_index & with settings
	Class: class,
	}, nil
	}

	func (s Searcher) extractContains(path filters.Path, propType schema.DataType, value interface{},
	operator filters.Operator, class *models.Class,
	) (*propValuePair, error) {
	var operands []filters.Clause
	switch propType {
	case schema.DataTypeText, schema.DataTypeTextArray:
	valueStringArray, err := s.extractStringArray(value)
	if err != nil {
	return nil, err
	}
	operands = getContainsOperands(propType, path, valueStringArray)
	case schema.DataTypeInt, schema.DataTypeIntArray:
	valueIntArray, err := s.extractIntArray(value)
	if err != nil {
	return nil, err
	}
	operands = getContainsOperands(propType, path, valueIntArray)
	case schema.DataTypeNumber, schema.DataTypeNumberArray:
	valueFloat64Array, err := s.extractFloat64Array(value)
	if err != nil {
	return nil, err
	}
	operands = getContainsOperands(propType, path, valueFloat64Array)
	case schema.DataTypeBoolean, schema.DataTypeBooleanArray:
	valueBooleanArray, err := s.extractBoolArray(value)
	if err != nil {
	return nil, err
	}
	operands = getContainsOperands(propType, path, valueBooleanArray)
	case schema.DataTypeDate, schema.DataTypeDateArray:
	valueDateArray, err := s.extractStringArray(value)
	if err != nil {
	return nil, err
	}
	operands = getContainsOperands(propType, path, valueDateArray)
	default:
	return nil, fmt.Errorf("unsupported type '%T' for '%v' operator", propType, operator)
	}

	children, err := s.extractPropValuePairs(operands, schema.ClassName(class.Class))
	if err != nil {
	return nil, err
	}
	out, err := newPropValuePair(class)
	if err != nil {
	return nil, errors.Wrap(err, "new prop value pair")
	}
	out.children = children
	// filters.ContainsAny
	out.operator = filters.OperatorOr
	if operator == filters.ContainsAll {
	out.operator = filters.OperatorAnd
	}
	out.Class = class
	return out, nil
	}

	// TODO: repeated calls to on... aren't too efficient because we iterate over
	// the schema each time, might be smarter to have a single method that
	// determines the type and then we switch based on the result. However, the
	// effect of that should be very small unless the schema is absolutely massive.
	func (s Searcher) onRefProp(property models.Property) bool {
	return schema.IsRefDataType(property.DataType)
	}

	// TODO: repeated calls to on... aren't too efficient because we iterate over
	// the schema each time, might be smarter to have a single method that
	// determines the type and then we switch based on the result. However, the
	// effect of that should be very small unless the schema is absolutely massive.
	func (s Searcher) onGeoProp(prop models.Property) bool {
	return schema.DataType(prop.DataType[0]) == schema.DataTypeGeoCoordinates
	}

	// Note: A UUID prop is a user-specified prop of type UUID. This has nothing to
	// do with the primary ID of an object which happens to always be a UUID in
	// Weaviate v1
	//
	// TODO: repeated calls to on... aren't too efficient because we iterate over
	// the schema each time, might be smarter to have a single method that
	// determines the type and then we switch based on the result. However, the
	// effect of that should be very small unless the schema is absolutely massive.
	func (s Searcher) onUUIDProp(prop models.Property) bool {
	switch dt, _ := schema.AsPrimitive(prop.DataType); dt {
	case schema.DataTypeUUID, schema.DataTypeUUIDArray:
	return true
	default:
	return false
	}
	}

	func (s *Searcher) onInternalProp(propName string) bool {
	return filters.IsInternalProperty(schema.PropertyName(propName))
	}

	func (s Searcher) onTokenizableProp(prop models.Property) bool {
	switch dt, _ := schema.AsPrimitive(prop.DataType); dt {
	case schema.DataTypeText, schema.DataTypeTextArray:
	return true
	default:
	return false
	}
	}

	func (s *Searcher) extractStringArray(value interface{}) ([]string, error) {
	switch v := value.(type) {
	case []string:
	return v, nil
	case []interface{}:
	vals := make([]string, len(v))
	for i := range v {
	val, ok := v[i].(string)
	if !ok {
	return nil, fmt.Errorf("value[%d] type should be string but is %T", i, v[i])
	}
	vals[i] = val
	}
	return vals, nil
	default:
	return nil, fmt.Errorf("value type should be []string but is %T", value)
	}
	}

	func (s *Searcher) extractIntArray(value interface{}) ([]int, error) {
	switch v := value.(type) {
	case []int:
	return v, nil
	case []interface{}:
	vals := make([]int, len(v))
	for i := range v {
	// in this case all number values are unmarshalled to float64, so we need to cast to float64
	// and then make int
	val, ok := v[i].(float64)
	if !ok {
	return nil, fmt.Errorf("value[%d] type should be float64 but is %T", i, v[i])
	}
	vals[i] = int(val)
	}
	return vals, nil
	default:
	return nil, fmt.Errorf("value type should be []int but is %T", value)
	}
	}

	func (s *Searcher) extractFloat64Array(value interface{}) ([]float64, error) {
	switch v := value.(type) {
	case []float64:
	return v, nil
	case []interface{}:
	vals := make([]float64, len(v))
	for i := range v {
	val, ok := v[i].(float64)
	if !ok {
	return nil, fmt.Errorf("value[%d] type should be float64 but is %T", i, v[i])
	}
	vals[i] = val
	}
	return vals, nil
	default:
	return nil, fmt.Errorf("value type should be []float64 but is %T", value)
	}
	}

	func (s *Searcher) extractBoolArray(value interface{}) ([]bool, error) {
	switch v := value.(type) {
	case []bool:
	return v, nil
	case []interface{}:
	vals := make([]bool, len(v))
	for i := range v {
	val, ok := v[i].(bool)
	if !ok {
	return nil, fmt.Errorf("value[%d] type should be bool but is %T", i, v[i])
	}
	vals[i] = val
	}
	return vals, nil
	default:
	return nil, fmt.Errorf("value type should be []bool but is %T", value)
	}
	}

	func getContainsOperands[T any](propType schema.DataType, path *filters.Path, values []T) []filters.Clause {
	operands := make([]filters.Clause, len(values))
	for i := range values {
	operands[i] = filters.Clause{
	Operator: filters.OperatorEqual,
	On: path,
	Value: &filters.Value{
	Type: propType,
	Value: values[i],
	},
	}
	}
	return operands
	}

	type docIDsIterator interface {
	Next() (uint64, bool)
	Len() int
	}

	type sliceDocIDsIterator struct {
	docIDs []uint64
	pos int
	}

	func newSliceDocIDsIterator(docIDs []uint64) docIDsIterator {
	return &sliceDocIDsIterator{docIDs: docIDs, pos: 0}
	}

	func (it *sliceDocIDsIterator) Next() (uint64, bool) {
	if it.pos >= len(it.docIDs) {
	return 0, false
	}
	pos := it.pos
	it.pos++
	return it.docIDs[pos], true
	}

	func (it *sliceDocIDsIterator) Len() int {
	return len(it.docIDs)
	}

	type docBitmap struct {
	docIDs *sroar.Bitmap
	}

	// newUninitializedDocBitmap can be used whenever we can be sure that the first
	// user of the docBitmap will set or replace the bitmap, such as a row reader
	func newUninitializedDocBitmap() docBitmap {
	return docBitmap{docIDs: nil}
	}

	func newDocBitmap() docBitmap {
	return docBitmap{docIDs: sroar.NewBitmap()}
	}

	func (dbm *docBitmap) count() int {
	if dbm.docIDs == nil {
	return 0
	}
	return dbm.docIDs.GetCardinality()
	}

	func (dbm *docBitmap) IDs() []uint64 {
	if dbm.docIDs == nil {
	return []uint64{}
	}
	return dbm.docIDs.ToArray()
	}

	func (dbm *docBitmap) IDsWithLimit(limit int) []uint64 {
	card := dbm.docIDs.GetCardinality()
	if limit >= card {
	return dbm.IDs()
	}

	out := make([]uint64, limit)
	for i := range out {
	// safe to ignore error, it can only error if the index is >= cardinality
	// which we have already ruled out
	out[i], _ = dbm.docIDs.Select(uint64(i))
	}

	return out
	}

	type docPointerWithScore struct {
	id uint64
	frequency float32
	propLength float32
	}