MVPilgrim
restore
6b502ec
raw
history blame
5.47 kB
// _ _
// __ _____ __ ___ ___ __ _| |_ ___
// \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
// \ V V / __/ (_| |\ V /| | (_| | || __/
// \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
//
// Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
//
// CONTACT: [email protected]
//
package modtransformers
import (
"context"
"net/http"
"os"
"time"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"github.com/weaviate/weaviate/entities/models"
"github.com/weaviate/weaviate/entities/modulecapabilities"
"github.com/weaviate/weaviate/entities/moduletools"
"github.com/weaviate/weaviate/modules/text2vec-transformers/clients"
"github.com/weaviate/weaviate/modules/text2vec-transformers/vectorizer"
"github.com/weaviate/weaviate/usecases/modulecomponents/additional"
)
func New() *TransformersModule {
return &TransformersModule{}
}
type TransformersModule struct {
vectorizer textVectorizer
metaProvider metaProvider
graphqlProvider modulecapabilities.GraphQLArguments
searcher modulecapabilities.Searcher
nearTextTransformer modulecapabilities.TextTransform
logger logrus.FieldLogger
additionalPropertiesProvider modulecapabilities.AdditionalProperties
}
type textVectorizer interface {
Object(ctx context.Context, obj *models.Object, objDiff *moduletools.ObjectDiff,
cfg moduletools.ClassConfig) error
Texts(ctx context.Context, input []string,
cfg moduletools.ClassConfig) ([]float32, error)
}
type metaProvider interface {
MetaInfo() (map[string]interface{}, error)
}
func (m *TransformersModule) Name() string {
return "text2vec-transformers"
}
func (m *TransformersModule) Type() modulecapabilities.ModuleType {
return modulecapabilities.Text2Vec
}
func (m *TransformersModule) Init(ctx context.Context,
params moduletools.ModuleInitParams,
) error {
m.logger = params.GetLogger()
if err := m.initVectorizer(ctx, params.GetConfig().ModuleHttpClientTimeout, m.logger); err != nil {
return errors.Wrap(err, "init vectorizer")
}
if err := m.initAdditionalPropertiesProvider(); err != nil {
return errors.Wrap(err, "init additional properties provider")
}
return nil
}
func (m *TransformersModule) InitExtension(modules []modulecapabilities.Module) error {
for _, module := range modules {
if module.Name() == m.Name() {
continue
}
if arg, ok := module.(modulecapabilities.TextTransformers); ok {
if arg != nil && arg.TextTransformers() != nil {
m.nearTextTransformer = arg.TextTransformers()["nearText"]
}
}
}
if err := m.initNearText(); err != nil {
return errors.Wrap(err, "init graphql provider")
}
return nil
}
func (m *TransformersModule) initVectorizer(ctx context.Context, timeout time.Duration,
logger logrus.FieldLogger,
) error {
// TODO: gh-1486 proper config management
uriPassage := os.Getenv("TRANSFORMERS_PASSAGE_INFERENCE_API")
uriQuery := os.Getenv("TRANSFORMERS_QUERY_INFERENCE_API")
uriCommon := os.Getenv("TRANSFORMERS_INFERENCE_API")
if uriCommon == "" {
if uriPassage == "" && uriQuery == "" {
return errors.Errorf("required variable TRANSFORMERS_INFERENCE_API or both variables TRANSFORMERS_PASSAGE_INFERENCE_API and TRANSFORMERS_QUERY_INFERENCE_API are not set")
}
if uriPassage != "" && uriQuery == "" {
return errors.Errorf("required variable TRANSFORMERS_QUERY_INFERENCE_API is not set")
}
if uriPassage == "" && uriQuery != "" {
return errors.Errorf("required variable TRANSFORMERS_PASSAGE_INFERENCE_API is not set")
}
} else {
if uriPassage != "" || uriQuery != "" {
return errors.Errorf("either variable TRANSFORMERS_INFERENCE_API or both variables TRANSFORMERS_PASSAGE_INFERENCE_API and TRANSFORMERS_QUERY_INFERENCE_API should be set")
}
uriPassage = uriCommon
uriQuery = uriCommon
}
client := clients.New(uriPassage, uriQuery, timeout, logger)
if err := client.WaitForStartup(ctx, 1*time.Second); err != nil {
return errors.Wrap(err, "init remote vectorizer")
}
m.vectorizer = vectorizer.New(client)
m.metaProvider = client
return nil
}
func (m *TransformersModule) initAdditionalPropertiesProvider() error {
m.additionalPropertiesProvider = additional.NewText2VecProvider()
return nil
}
func (m *TransformersModule) RootHandler() http.Handler {
// TODO: remove once this is a capability interface
return nil
}
func (m *TransformersModule) VectorizeObject(ctx context.Context,
obj *models.Object, objDiff *moduletools.ObjectDiff, cfg moduletools.ClassConfig,
) error {
return m.vectorizer.Object(ctx, obj, objDiff, cfg)
}
func (m *TransformersModule) MetaInfo() (map[string]interface{}, error) {
return m.metaProvider.MetaInfo()
}
func (m *TransformersModule) AdditionalProperties() map[string]modulecapabilities.AdditionalProperty {
return m.additionalPropertiesProvider.AdditionalProperties()
}
func (m *TransformersModule) VectorizeInput(ctx context.Context,
input string, cfg moduletools.ClassConfig,
) ([]float32, error) {
return m.vectorizer.Texts(ctx, []string{input}, cfg)
}
// verify we implement the modules.Module interface
var (
_ = modulecapabilities.Module(New())
_ = modulecapabilities.Vectorizer(New())
_ = modulecapabilities.MetaProvider(New())
)