Spaces:
Running
Running
// _ _ | |
// __ _____ __ ___ ___ __ _| |_ ___ | |
// \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ | |
// \ V V / __/ (_| |\ V /| | (_| | || __/ | |
// \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| | |
// | |
// Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. | |
// | |
// CONTACT: [email protected] | |
// | |
package vectorizer | |
import ( | |
"context" | |
"github.com/weaviate/weaviate/entities/models" | |
"github.com/weaviate/weaviate/entities/moduletools" | |
"github.com/weaviate/weaviate/modules/text2vec-openai/ent" | |
objectsvectorizer "github.com/weaviate/weaviate/usecases/modulecomponents/vectorizer" | |
libvectorizer "github.com/weaviate/weaviate/usecases/vectorizer" | |
) | |
type Vectorizer struct { | |
client Client | |
objectVectorizer *objectsvectorizer.ObjectVectorizer | |
} | |
func New(client Client) *Vectorizer { | |
return &Vectorizer{ | |
client: client, | |
objectVectorizer: objectsvectorizer.New(), | |
} | |
} | |
type Client interface { | |
Vectorize(ctx context.Context, input string, | |
config ent.VectorizationConfig) (*ent.VectorizationResult, error) | |
VectorizeQuery(ctx context.Context, input []string, | |
config ent.VectorizationConfig) (*ent.VectorizationResult, error) | |
} | |
// IndexCheck returns whether a property of a class should be indexed | |
type ClassSettings interface { | |
PropertyIndexed(property string) bool | |
VectorizePropertyName(propertyName string) bool | |
VectorizeClassName() bool | |
Model() string | |
Type() string | |
ModelVersion() string | |
ResourceName() string | |
DeploymentID() string | |
BaseURL() string | |
IsAzure() bool | |
} | |
func (v *Vectorizer) Object(ctx context.Context, object *models.Object, | |
objDiff *moduletools.ObjectDiff, cfg moduletools.ClassConfig, | |
) error { | |
vec, err := v.object(ctx, object.Class, object.Properties, objDiff, cfg) | |
if err != nil { | |
return err | |
} | |
object.Vector = vec | |
return nil | |
} | |
func (v *Vectorizer) object(ctx context.Context, className string, | |
schema interface{}, objDiff *moduletools.ObjectDiff, cfg moduletools.ClassConfig, | |
) ([]float32, error) { | |
text, vector, err := v.objectVectorizer.TextsOrVector(ctx, className, schema, objDiff, NewClassSettings(cfg)) | |
if err != nil { | |
return nil, err | |
} | |
if vector != nil { | |
// dont' re-vectorize | |
return vector, nil | |
} | |
// vectorize text | |
res, err := v.client.Vectorize(ctx, text, v.getVectorizationConfig(cfg)) | |
if err != nil { | |
return nil, err | |
} | |
if len(res.Vector) > 1 { | |
return libvectorizer.CombineVectors(res.Vector), nil | |
} | |
return res.Vector[0], nil | |
} | |
func (v *Vectorizer) getVectorizationConfig(cfg moduletools.ClassConfig) ent.VectorizationConfig { | |
settings := NewClassSettings(cfg) | |
return ent.VectorizationConfig{ | |
Type: settings.Type(), | |
Model: settings.Model(), | |
ModelVersion: settings.ModelVersion(), | |
ResourceName: settings.ResourceName(), | |
DeploymentID: settings.DeploymentID(), | |
BaseURL: settings.BaseURL(), | |
IsAzure: settings.IsAzure(), | |
} | |
} | |