KevinStephenson
Adding in weaviate code
b110593
raw
history blame
18.3 kB
// _ _
// __ _____ __ ___ ___ __ _| |_ ___
// \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
// \ V V / __/ (_| |\ V /| | (_| | || __/
// \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
//
// Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
//
// CONTACT: [email protected]
//
package inverted
import (
"encoding/json"
"fmt"
"time"
"unicode/utf8"
"github.com/go-openapi/strfmt"
"github.com/google/uuid"
"github.com/pkg/errors"
"github.com/weaviate/weaviate/adapters/repos/db/helpers"
"github.com/weaviate/weaviate/entities/filters"
"github.com/weaviate/weaviate/entities/models"
"github.com/weaviate/weaviate/entities/schema"
"github.com/weaviate/weaviate/usecases/objects/validation"
)
func (a *Analyzer) Object(input map[string]any, props []*models.Property,
uuid strfmt.UUID,
) ([]Property, error) {
propsMap := map[string]*models.Property{}
for _, prop := range props {
propsMap[prop.Name] = prop
}
properties, err := a.analyzeProps(propsMap, input)
if err != nil {
return nil, fmt.Errorf("analyze props: %w", err)
}
idProp, err := a.analyzeIDProp(uuid)
if err != nil {
return nil, fmt.Errorf("analyze uuid prop: %w", err)
}
properties = append(properties, *idProp)
tsProps, err := a.analyzeTimestampProps(input)
if err != nil {
return nil, fmt.Errorf("analyze timestamp props: %w", err)
}
// tsProps will be nil here if weaviate is
// not setup to index by timestamps
if tsProps != nil {
properties = append(properties, tsProps...)
}
return properties, nil
}
func (a *Analyzer) analyzeProps(propsMap map[string]*models.Property,
input map[string]any,
) ([]Property, error) {
var out []Property
for key, prop := range propsMap {
if len(prop.DataType) < 1 {
return nil, fmt.Errorf("prop %q has no datatype", prop.Name)
}
if !HasInvertedIndex(prop) {
continue
}
if schema.IsBlobDataType(prop.DataType) {
continue
}
if schema.IsRefDataType(prop.DataType) {
if err := a.extendPropertiesWithReference(&out, prop, input, key); err != nil {
return nil, err
}
} else if schema.IsArrayDataType(prop.DataType) {
if err := a.extendPropertiesWithArrayType(&out, prop, input, key); err != nil {
return nil, err
}
} else {
if err := a.extendPropertiesWithPrimitive(&out, prop, input, key); err != nil {
return nil, err
}
}
}
return out, nil
}
func (a *Analyzer) analyzeIDProp(id strfmt.UUID) (*Property, error) {
value, err := id.MarshalText()
if err != nil {
return nil, fmt.Errorf("marshal id prop: %w", err)
}
return &Property{
Name: filters.InternalPropID,
Items: []Countable{
{
Data: value,
},
},
HasFilterableIndex: HasFilterableIndexIdProp,
HasSearchableIndex: HasSearchableIndexIdProp,
}, nil
}
func (a *Analyzer) analyzeTimestampProps(input map[string]any) ([]Property, error) {
createTime, createTimeOK := input[filters.InternalPropCreationTimeUnix]
updateTime, updateTimeOK := input[filters.InternalPropLastUpdateTimeUnix]
var props []Property
if createTimeOK {
b, err := json.Marshal(createTime)
if err != nil {
return nil, fmt.Errorf("analyze create timestamp prop: %w", err)
}
props = append(props, Property{
Name: filters.InternalPropCreationTimeUnix,
Items: []Countable{{Data: b}},
HasFilterableIndex: HasFilterableIndexTimestampProp,
HasSearchableIndex: HasSearchableIndexTimestampProp,
})
}
if updateTimeOK {
b, err := json.Marshal(updateTime)
if err != nil {
return nil, fmt.Errorf("analyze update timestamp prop: %w", err)
}
props = append(props, Property{
Name: filters.InternalPropLastUpdateTimeUnix,
Items: []Countable{{Data: b}},
HasFilterableIndex: HasFilterableIndexTimestampProp,
HasSearchableIndex: HasSearchableIndexTimestampProp,
})
}
return props, nil
}
func (a *Analyzer) extendPropertiesWithArrayType(properties *[]Property,
prop *models.Property, input map[string]any, propName string,
) error {
value, ok := input[propName]
if !ok {
// skip any primitive prop that's not set
return nil
}
var err error
value, err = typedSliceToUntyped(value)
if err != nil {
return fmt.Errorf("extend properties with array type: %w", err)
}
values, ok := value.([]any)
if !ok {
// skip any primitive prop that's not set
return errors.New("analyze array prop: expected array prop")
}
property, err := a.analyzeArrayProp(prop, values)
if err != nil {
return fmt.Errorf("analyze array prop: %w", err)
}
if property == nil {
return nil
}
*properties = append(*properties, *property)
return nil
}
// extendPropertiesWithPrimitive mutates the passed in properties, by extending
// it with an additional property - if applicable
func (a *Analyzer) extendPropertiesWithPrimitive(properties *[]Property,
prop *models.Property, input map[string]any, propName string,
) error {
var property *Property
var err error
value, ok := input[propName]
if !ok {
// skip any primitive prop that's not set
return nil
}
property, err = a.analyzePrimitiveProp(prop, value)
if err != nil {
return fmt.Errorf("analyze primitive prop: %w", err)
}
if property == nil {
return nil
}
*properties = append(*properties, *property)
return nil
}
func (a *Analyzer) analyzeArrayProp(prop *models.Property, values []any) (*Property, error) {
var items []Countable
hasFilterableIndex := HasFilterableIndex(prop)
hasSearchableIndex := HasSearchableIndex(prop)
switch dt := schema.DataType(prop.DataType[0]); dt {
case schema.DataTypeTextArray:
hasFilterableIndex = hasFilterableIndex && !a.isFallbackToSearchable()
in, err := stringsFromValues(prop, values)
if err != nil {
return nil, err
}
items = a.TextArray(prop.Tokenization, in)
case schema.DataTypeIntArray:
in := make([]int64, len(values))
for i, value := range values {
if asJsonNumber, ok := value.(json.Number); ok {
var err error
value, err = asJsonNumber.Float64()
if err != nil {
return nil, err
}
}
if asFloat, ok := value.(float64); ok {
// unmarshaling from json into a dynamic schema will assume every number
// is a float64
value = int64(asFloat)
}
asInt, ok := value.(int64)
if !ok {
return nil, fmt.Errorf("expected property %s to be of type int64, but got %T", prop.Name, value)
}
in[i] = asInt
}
var err error
items, err = a.IntArray(in)
if err != nil {
return nil, fmt.Errorf("analyze property %s: %w", prop.Name, err)
}
case schema.DataTypeNumberArray:
in := make([]float64, len(values))
for i, value := range values {
if asJsonNumber, ok := value.(json.Number); ok {
var err error
value, err = asJsonNumber.Float64()
if err != nil {
return nil, err
}
}
asFloat, ok := value.(float64)
if !ok {
return nil, fmt.Errorf("expected property %s to be of type float64, but got %T", prop.Name, value)
}
in[i] = asFloat
}
var err error
items, err = a.FloatArray(in) // convert to int before analyzing
if err != nil {
return nil, fmt.Errorf("analyze property %s: %w", prop.Name, err)
}
case schema.DataTypeBooleanArray:
in := make([]bool, len(values))
for i, value := range values {
asBool, ok := value.(bool)
if !ok {
return nil, fmt.Errorf("expected property %s to be of type bool, but got %T", prop.Name, value)
}
in[i] = asBool
}
var err error
items, err = a.BoolArray(in) // convert to int before analyzing
if err != nil {
return nil, fmt.Errorf("analyze property %s: %w", prop.Name, err)
}
case schema.DataTypeDateArray:
in := make([]int64, len(values))
for i, value := range values {
// dates can be either a date-string or directly a time object. Try to parse both
if asTime, okTime := value.(time.Time); okTime {
in[i] = asTime.UnixNano()
} else if asString, okString := value.(string); okString {
parsedTime, err := time.Parse(time.RFC3339Nano, asString)
if err != nil {
return nil, fmt.Errorf("parse time: %w", err)
}
in[i] = parsedTime.UnixNano()
} else {
return nil, fmt.Errorf("expected property %s to be a time-string or time object, but got %T", prop.Name, value)
}
}
var err error
items, err = a.IntArray(in)
if err != nil {
return nil, fmt.Errorf("analyze property %s: %w", prop.Name, err)
}
case schema.DataTypeUUIDArray:
parsed, err := validation.ParseUUIDArray(values)
if err != nil {
return nil, fmt.Errorf("parse uuid array: %w", err)
}
items, err = a.UUIDArray(parsed)
if err != nil {
return nil, fmt.Errorf("analyze property %s: %w", prop.Name, err)
}
default:
// ignore unsupported prop type
return nil, nil
}
return &Property{
Name: prop.Name,
Items: items,
Length: len(values),
HasFilterableIndex: hasFilterableIndex,
HasSearchableIndex: hasSearchableIndex,
}, nil
}
func stringsFromValues(prop *models.Property, values []any) ([]string, error) {
in := make([]string, len(values))
for i, value := range values {
asString, ok := value.(string)
if !ok {
return nil, fmt.Errorf("expected property %s to be of type string, but got %T", prop.Name, value)
}
in[i] = asString
}
return in, nil
}
func (a *Analyzer) analyzePrimitiveProp(prop *models.Property, value any) (*Property, error) {
var items []Countable
propertyLength := -1 // will be overwritten for string/text, signals not to add the other types.
hasFilterableIndex := HasFilterableIndex(prop)
hasSearchableIndex := HasSearchableIndex(prop)
switch dt := schema.DataType(prop.DataType[0]); dt {
case schema.DataTypeText:
hasFilterableIndex = hasFilterableIndex && !a.isFallbackToSearchable()
asString, ok := value.(string)
if !ok {
return nil, fmt.Errorf("expected property %s to be of type string, but got %T", prop.Name, value)
}
items = a.Text(prop.Tokenization, asString)
propertyLength = utf8.RuneCountInString(asString)
case schema.DataTypeInt:
if asFloat, ok := value.(float64); ok {
// unmarshaling from json into a dynamic schema will assume every number
// is a float64
value = int64(asFloat)
}
if asInt, ok := value.(int); ok {
// when merging an existing object we may retrieve an untyped int
value = int64(asInt)
}
asInt, ok := value.(int64)
if !ok {
return nil, fmt.Errorf("expected property %s to be of type int64, but got %T", prop.Name, value)
}
var err error
items, err = a.Int(asInt)
if err != nil {
return nil, fmt.Errorf("analyze property %s: %w", prop.Name, err)
}
case schema.DataTypeNumber:
asFloat, ok := value.(float64)
if !ok {
return nil, fmt.Errorf("expected property %s to be of type float64, but got %T", prop.Name, value)
}
var err error
items, err = a.Float(asFloat) // convert to int before analyzing
if err != nil {
return nil, fmt.Errorf("analyze property %s: %w", prop.Name, err)
}
case schema.DataTypeBoolean:
asBool, ok := value.(bool)
if !ok {
return nil, fmt.Errorf("expected property %s to be of type bool, but got %T", prop.Name, value)
}
var err error
items, err = a.Bool(asBool) // convert to int before analyzing
if err != nil {
return nil, fmt.Errorf("analyze property %s: %w", prop.Name, err)
}
case schema.DataTypeDate:
var err error
if asString, ok := value.(string); ok {
// for example when patching the date may have been loaded as a string
value, err = time.Parse(time.RFC3339Nano, asString)
if err != nil {
return nil, fmt.Errorf("parse stringified timestamp: %w", err)
}
}
asTime, ok := value.(time.Time)
if !ok {
return nil, fmt.Errorf("expected property %s to be time.Time, but got %T", prop.Name, value)
}
items, err = a.Int(asTime.UnixNano())
if err != nil {
return nil, fmt.Errorf("analyze property %s: %w", prop.Name, err)
}
case schema.DataTypeUUID:
var err error
if asString, ok := value.(string); ok {
// for example when patching the uuid may have been loaded as a string
value, err = uuid.Parse(asString)
if err != nil {
return nil, fmt.Errorf("parse stringified uuid: %w", err)
}
}
asUUID, ok := value.(uuid.UUID)
if !ok {
return nil, fmt.Errorf("expected property %s to be uuid.UUID, but got %T", prop.Name, value)
}
items, err = a.UUID(asUUID)
if err != nil {
return nil, fmt.Errorf("analyze property %s: %w", prop.Name, err)
}
default:
// ignore unsupported prop type
return nil, nil
}
return &Property{
Name: prop.Name,
Items: items,
Length: propertyLength,
HasFilterableIndex: hasFilterableIndex,
HasSearchableIndex: hasSearchableIndex,
}, nil
}
// extendPropertiesWithReference extends the specified properties arrays with
// either 1 or 2 entries: If the ref is not set, only the ref-count property
// will be added. If the ref is set the ref-prop itself will also be added and
// contain all references as values
func (a *Analyzer) extendPropertiesWithReference(properties *[]Property,
prop *models.Property, input map[string]any, propName string,
) error {
value, ok := input[propName]
if !ok {
// explicitly set zero-value, so we can index for "ref not set"
value = make(models.MultipleRef, 0)
}
var asRefs models.MultipleRef
asRefs, ok = value.(models.MultipleRef)
if !ok {
// due to the fix introduced in https://github.com/weaviate/weaviate/pull/2320,
// MultipleRef's can appear as empty []any when no actual refs are provided for
// an object's reference property.
//
// if we encounter []any, assume it indicates an empty ref prop, and skip it.
_, ok := value.([]any)
if !ok {
return fmt.Errorf("expected property %q to be of type models.MutlipleRef,"+
" but got %T", prop.Name, value)
}
return nil
}
property, err := a.analyzeRefPropCount(prop, asRefs)
if err != nil {
return fmt.Errorf("ref count: %w", err)
}
*properties = append(*properties, *property)
if len(asRefs) == 0 {
return nil
}
property, err = a.analyzeRefProp(prop, asRefs)
if err != nil {
return fmt.Errorf("refs: %w", err)
}
*properties = append(*properties, *property)
return nil
}
func (a *Analyzer) analyzeRefPropCount(prop *models.Property,
value models.MultipleRef,
) (*Property, error) {
items, err := a.RefCount(value)
if err != nil {
return nil, fmt.Errorf("analyze ref-property %q: %w", prop.Name, err)
}
return &Property{
Name: helpers.MetaCountProp(prop.Name),
Items: items,
Length: len(value),
HasFilterableIndex: HasFilterableIndex(prop),
HasSearchableIndex: HasSearchableIndex(prop),
}, nil
}
func (a *Analyzer) analyzeRefProp(prop *models.Property,
value models.MultipleRef,
) (*Property, error) {
items, err := a.Ref(value)
if err != nil {
return nil, fmt.Errorf("analyze ref-property %q: %w", prop.Name, err)
}
return &Property{
Name: prop.Name,
Items: items,
HasFilterableIndex: HasFilterableIndex(prop),
HasSearchableIndex: HasSearchableIndex(prop),
}, nil
}
func typedSliceToUntyped(in any) ([]any, error) {
switch typed := in.(type) {
case []any:
// nothing to do
return typed, nil
case []string:
return convertToUntyped[string](typed), nil
case []int:
return convertToUntyped[int](typed), nil
case []time.Time:
return convertToUntyped[time.Time](typed), nil
case []bool:
return convertToUntyped[bool](typed), nil
case []float64:
return convertToUntyped[float64](typed), nil
case []uuid.UUID:
return convertToUntyped[uuid.UUID](typed), nil
default:
return nil, errors.Errorf("unsupported type %T", in)
}
}
func convertToUntyped[T comparable](in []T) []any {
out := make([]any, len(in))
for i := range out {
out[i] = in[i]
}
return out
}
// Indicates whether property should be indexed
// Index holds document ids with property of/containing particular value
// and number of its occurrences in that property
// (index created using bucket of StrategyMapCollection)
func HasSearchableIndex(prop *models.Property) bool {
switch dt, _ := schema.AsPrimitive(prop.DataType); dt {
case schema.DataTypeText, schema.DataTypeTextArray:
// by default property has searchable index only for text/text[] props
if prop.IndexSearchable == nil {
return true
}
return *prop.IndexSearchable
default:
return false
}
}
// Indicates whether property should be indexed
// Index holds document ids with property of/containing particular value
// (index created using bucket of StrategyRoaringSet)
func HasFilterableIndex(prop *models.Property) bool {
// by default property has filterable index
if prop.IndexFilterable == nil {
return true
}
return *prop.IndexFilterable
}
func HasInvertedIndex(prop *models.Property) bool {
return HasFilterableIndex(prop) || HasSearchableIndex(prop)
}
const (
// always
HasFilterableIndexIdProp = true
HasSearchableIndexIdProp = false
// only if index.invertedIndexConfig.IndexTimestamps set
HasFilterableIndexTimestampProp = true
HasSearchableIndexTimestampProp = false
// only if property.indexFilterable or property.indexSearchable set
HasFilterableIndexMetaCount = true
HasSearchableIndexMetaCount = false
// only if index.invertedIndexConfig.IndexNullState set
// and either property.indexFilterable or property.indexSearchable set
HasFilterableIndexPropNull = true
HasSearchableIndexPropNull = false
// only if index.invertedIndexConfig.IndexPropertyLength set
// and either property.indexFilterable or property.indexSearchable set
HasFilterableIndexPropLength = true
HasSearchableIndexPropLength = false
)