KevinStephenson
Adding in weaviate code
b110593
raw
history blame
4.27 kB
// _ _
// __ _____ __ ___ ___ __ _| |_ ___
// \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
// \ V V / __/ (_| |\ V /| | (_| | || __/
// \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
//
// Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
//
// CONTACT: [email protected]
//
package vectorizer
import (
"context"
"github.com/pkg/errors"
"github.com/go-openapi/strfmt"
"github.com/weaviate/weaviate/entities/models"
"github.com/weaviate/weaviate/entities/moduletools"
"github.com/weaviate/weaviate/modules/multi2vec-clip/ent"
libvectorizer "github.com/weaviate/weaviate/usecases/vectorizer"
)
type Vectorizer struct {
client Client
}
func New(client Client) *Vectorizer {
return &Vectorizer{
client: client,
}
}
type Client interface {
Vectorize(ctx context.Context,
texts, images []string) (*ent.VectorizationResult, error)
}
type ClassSettings interface {
ImageField(property string) bool
ImageFieldsWeights() ([]float32, error)
TextField(property string) bool
TextFieldsWeights() ([]float32, error)
}
func (v *Vectorizer) Object(ctx context.Context, object *models.Object,
objDiff *moduletools.ObjectDiff, settings ClassSettings,
) error {
vec, err := v.object(ctx, object.ID, object.Properties, objDiff, settings)
if err != nil {
return err
}
object.Vector = vec
return nil
}
func (v *Vectorizer) VectorizeImage(ctx context.Context, image string) ([]float32, error) {
res, err := v.client.Vectorize(ctx, []string{}, []string{image})
if err != nil {
return nil, err
}
if len(res.ImageVectors) != 1 {
return nil, errors.New("empty vector")
}
return res.ImageVectors[0], nil
}
func (v *Vectorizer) object(ctx context.Context, id strfmt.UUID,
schema interface{}, objDiff *moduletools.ObjectDiff, ichek ClassSettings,
) ([]float32, error) {
vectorize := objDiff == nil || objDiff.GetVec() == nil
// vectorize image and text
texts := []string{}
images := []string{}
if schema != nil {
for prop, value := range schema.(map[string]interface{}) {
if ichek.ImageField(prop) {
valueString, ok := value.(string)
if ok {
images = append(images, valueString)
vectorize = vectorize || (objDiff != nil && objDiff.IsChangedProp(prop))
}
}
if ichek.TextField(prop) {
valueString, ok := value.(string)
if ok {
texts = append(texts, valueString)
vectorize = vectorize || (objDiff != nil && objDiff.IsChangedProp(prop))
}
}
valueArr, ok := value.([]interface{})
if ok {
for _, value := range valueArr {
valueString, ok := value.(string)
if ok {
texts = append(texts, valueString)
vectorize = vectorize || (objDiff != nil && objDiff.IsChangedProp(prop))
}
}
}
}
}
// no property was changed, old vector can be used
if !vectorize {
return objDiff.GetVec(), nil
}
vectors := [][]float32{}
if len(texts) > 0 || len(images) > 0 {
res, err := v.client.Vectorize(ctx, texts, images)
if err != nil {
return nil, err
}
vectors = append(vectors, res.TextVectors...)
vectors = append(vectors, res.ImageVectors...)
}
weights, err := v.getWeights(ichek)
if err != nil {
return nil, err
}
return libvectorizer.CombineVectorsWithWeights(vectors, weights), nil
}
func (v *Vectorizer) getWeights(ichek ClassSettings) ([]float32, error) {
weights := []float32{}
textFieldsWeights, err := ichek.TextFieldsWeights()
if err != nil {
return nil, err
}
imageFieldsWeights, err := ichek.ImageFieldsWeights()
if err != nil {
return nil, err
}
weights = append(weights, textFieldsWeights...)
weights = append(weights, imageFieldsWeights...)
normalizedWeights := v.normalizeWeights(weights)
return normalizedWeights, nil
}
func (v *Vectorizer) normalizeWeights(weights []float32) []float32 {
if len(weights) > 0 {
var denominator float32
for i := range weights {
denominator += weights[i]
}
normalizer := 1 / denominator
normalized := make([]float32, len(weights))
for i := range weights {
normalized[i] = weights[i] * normalizer
}
return normalized
}
return nil
}