// _ _ // __ _____ __ ___ ___ __ _| |_ ___ // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ // \ V V / __/ (_| |\ V /| | (_| | || __/ // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| // // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. // // CONTACT: hello@weaviate.io // package vectorizer import ( "context" "github.com/pkg/errors" "github.com/go-openapi/strfmt" "github.com/weaviate/weaviate/entities/models" "github.com/weaviate/weaviate/entities/moduletools" "github.com/weaviate/weaviate/modules/multi2vec-clip/ent" libvectorizer "github.com/weaviate/weaviate/usecases/vectorizer" ) type Vectorizer struct { client Client } func New(client Client) *Vectorizer { return &Vectorizer{ client: client, } } type Client interface { Vectorize(ctx context.Context, texts, images []string) (*ent.VectorizationResult, error) } type ClassSettings interface { ImageField(property string) bool ImageFieldsWeights() ([]float32, error) TextField(property string) bool TextFieldsWeights() ([]float32, error) } func (v *Vectorizer) Object(ctx context.Context, object *models.Object, objDiff *moduletools.ObjectDiff, settings ClassSettings, ) error { vec, err := v.object(ctx, object.ID, object.Properties, objDiff, settings) if err != nil { return err } object.Vector = vec return nil } func (v *Vectorizer) VectorizeImage(ctx context.Context, image string) ([]float32, error) { res, err := v.client.Vectorize(ctx, []string{}, []string{image}) if err != nil { return nil, err } if len(res.ImageVectors) != 1 { return nil, errors.New("empty vector") } return res.ImageVectors[0], nil } func (v *Vectorizer) object(ctx context.Context, id strfmt.UUID, schema interface{}, objDiff *moduletools.ObjectDiff, ichek ClassSettings, ) ([]float32, error) { vectorize := objDiff == nil || objDiff.GetVec() == nil // vectorize image and text texts := []string{} images := []string{} if schema != nil { for prop, value := range schema.(map[string]interface{}) { if ichek.ImageField(prop) { valueString, ok := value.(string) if ok { images = append(images, valueString) vectorize = vectorize || (objDiff != nil && objDiff.IsChangedProp(prop)) } } if ichek.TextField(prop) { valueString, ok := value.(string) if ok { texts = append(texts, valueString) vectorize = vectorize || (objDiff != nil && objDiff.IsChangedProp(prop)) } } valueArr, ok := value.([]interface{}) if ok { for _, value := range valueArr { valueString, ok := value.(string) if ok { texts = append(texts, valueString) vectorize = vectorize || (objDiff != nil && objDiff.IsChangedProp(prop)) } } } } } // no property was changed, old vector can be used if !vectorize { return objDiff.GetVec(), nil } vectors := [][]float32{} if len(texts) > 0 || len(images) > 0 { res, err := v.client.Vectorize(ctx, texts, images) if err != nil { return nil, err } vectors = append(vectors, res.TextVectors...) vectors = append(vectors, res.ImageVectors...) } weights, err := v.getWeights(ichek) if err != nil { return nil, err } return libvectorizer.CombineVectorsWithWeights(vectors, weights), nil } func (v *Vectorizer) getWeights(ichek ClassSettings) ([]float32, error) { weights := []float32{} textFieldsWeights, err := ichek.TextFieldsWeights() if err != nil { return nil, err } imageFieldsWeights, err := ichek.ImageFieldsWeights() if err != nil { return nil, err } weights = append(weights, textFieldsWeights...) weights = append(weights, imageFieldsWeights...) normalizedWeights := v.normalizeWeights(weights) return normalizedWeights, nil } func (v *Vectorizer) normalizeWeights(weights []float32) []float32 { if len(weights) > 0 { var denominator float32 for i := range weights { denominator += weights[i] } normalizer := 1 / denominator normalized := make([]float32, len(weights)) for i := range weights { normalized[i] = weights[i] * normalizer } return normalized } return nil }