Spaces:
Running
Running
// _ _ | |
// __ _____ __ ___ ___ __ _| |_ ___ | |
// \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ | |
// \ V V / __/ (_| |\ V /| | (_| | || __/ | |
// \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| | |
// | |
// Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. | |
// | |
// CONTACT: [email protected] | |
// | |
package vectorizer | |
import ( | |
"context" | |
"fmt" | |
"sort" | |
"strings" | |
"github.com/fatih/camelcase" | |
"github.com/weaviate/weaviate/entities/moduletools" | |
) | |
type ClassSettings interface { | |
PropertyIndexed(property string) bool | |
VectorizePropertyName(propertyName string) bool | |
VectorizeClassName() bool | |
} | |
type ObjectVectorizer struct{} | |
func New() *ObjectVectorizer { | |
return &ObjectVectorizer{} | |
} | |
func (v *ObjectVectorizer) TextsOrVector(ctx context.Context, className string, | |
schema interface{}, objDiff *moduletools.ObjectDiff, | |
icheck ClassSettings, | |
) (string, []float32, error) { | |
vectorize := objDiff == nil || objDiff.GetVec() == nil | |
var corpi []string | |
if icheck.VectorizeClassName() { | |
corpi = append(corpi, v.camelCaseToLower(className)) | |
} | |
if schema != nil { | |
schemamap := schema.(map[string]interface{}) | |
for _, prop := range v.sortStringKeys(schemamap) { | |
if !icheck.PropertyIndexed(prop) { | |
continue | |
} | |
appended := false | |
switch val := schemamap[prop].(type) { | |
case []string: | |
for _, elem := range val { | |
appended = v.appendPropIfText(icheck, &corpi, prop, elem) || appended | |
} | |
case []interface{}: | |
for _, elem := range val { | |
appended = v.appendPropIfText(icheck, &corpi, prop, elem) || appended | |
} | |
default: | |
appended = v.appendPropIfText(icheck, &corpi, prop, val) | |
} | |
vectorize = vectorize || (appended && objDiff != nil && objDiff.IsChangedProp(prop)) | |
} | |
} | |
if len(corpi) == 0 { | |
// fall back to using the class name | |
corpi = append(corpi, v.camelCaseToLower(className)) | |
} | |
// no property was changed, old vector can be used | |
if !vectorize { | |
return "", objDiff.GetVec(), nil | |
} | |
text := strings.Join(corpi, " ") | |
return text, nil, nil | |
} | |
func (v *ObjectVectorizer) sortStringKeys(schemaMap map[string]interface{}) []string { | |
keys := make([]string, 0, len(schemaMap)) | |
for k := range schemaMap { | |
keys = append(keys, k) | |
} | |
sort.Strings(keys) | |
return keys | |
} | |
func (v *ObjectVectorizer) appendPropIfText(icheck ClassSettings, list *[]string, propName string, | |
value interface{}, | |
) bool { | |
valueString, ok := value.(string) | |
if ok { | |
if icheck.VectorizePropertyName(propName) { | |
// use prop and value | |
*list = append(*list, strings.ToLower( | |
fmt.Sprintf("%s %s", v.camelCaseToLower(propName), valueString))) | |
} else { | |
*list = append(*list, strings.ToLower(valueString)) | |
} | |
return true | |
} | |
return false | |
} | |
func (v *ObjectVectorizer) camelCaseToLower(in string) string { | |
parts := camelcase.Split(in) | |
var sb strings.Builder | |
for i, part := range parts { | |
if part == " " { | |
continue | |
} | |
if i > 0 { | |
sb.WriteString(" ") | |
} | |
sb.WriteString(strings.ToLower(part)) | |
} | |
return sb.String() | |
} | |