Spaces:
Running
Running
// _ _ | |
// __ _____ __ ___ ___ __ _| |_ ___ | |
// \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ | |
// \ V V / __/ (_| |\ V /| | (_| | || __/ | |
// \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| | |
// | |
// Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. | |
// | |
// CONTACT: [email protected] | |
// | |
package modtransformers | |
import ( | |
"context" | |
"net/http" | |
"os" | |
"time" | |
"github.com/pkg/errors" | |
"github.com/sirupsen/logrus" | |
"github.com/weaviate/weaviate/entities/models" | |
"github.com/weaviate/weaviate/entities/modulecapabilities" | |
"github.com/weaviate/weaviate/entities/moduletools" | |
"github.com/weaviate/weaviate/modules/text2vec-transformers/clients" | |
"github.com/weaviate/weaviate/modules/text2vec-transformers/vectorizer" | |
"github.com/weaviate/weaviate/usecases/modulecomponents/additional" | |
) | |
func New() *TransformersModule { | |
return &TransformersModule{} | |
} | |
type TransformersModule struct { | |
vectorizer textVectorizer | |
metaProvider metaProvider | |
graphqlProvider modulecapabilities.GraphQLArguments | |
searcher modulecapabilities.Searcher | |
nearTextTransformer modulecapabilities.TextTransform | |
logger logrus.FieldLogger | |
additionalPropertiesProvider modulecapabilities.AdditionalProperties | |
} | |
type textVectorizer interface { | |
Object(ctx context.Context, obj *models.Object, objDiff *moduletools.ObjectDiff, | |
cfg moduletools.ClassConfig) error | |
Texts(ctx context.Context, input []string, | |
cfg moduletools.ClassConfig) ([]float32, error) | |
} | |
type metaProvider interface { | |
MetaInfo() (map[string]interface{}, error) | |
} | |
func (m *TransformersModule) Name() string { | |
return "text2vec-transformers" | |
} | |
func (m *TransformersModule) Type() modulecapabilities.ModuleType { | |
return modulecapabilities.Text2Vec | |
} | |
func (m *TransformersModule) Init(ctx context.Context, | |
params moduletools.ModuleInitParams, | |
) error { | |
m.logger = params.GetLogger() | |
if err := m.initVectorizer(ctx, params.GetConfig().ModuleHttpClientTimeout, m.logger); err != nil { | |
return errors.Wrap(err, "init vectorizer") | |
} | |
if err := m.initAdditionalPropertiesProvider(); err != nil { | |
return errors.Wrap(err, "init additional properties provider") | |
} | |
return nil | |
} | |
func (m *TransformersModule) InitExtension(modules []modulecapabilities.Module) error { | |
for _, module := range modules { | |
if module.Name() == m.Name() { | |
continue | |
} | |
if arg, ok := module.(modulecapabilities.TextTransformers); ok { | |
if arg != nil && arg.TextTransformers() != nil { | |
m.nearTextTransformer = arg.TextTransformers()["nearText"] | |
} | |
} | |
} | |
if err := m.initNearText(); err != nil { | |
return errors.Wrap(err, "init graphql provider") | |
} | |
return nil | |
} | |
func (m *TransformersModule) initVectorizer(ctx context.Context, timeout time.Duration, | |
logger logrus.FieldLogger, | |
) error { | |
// TODO: gh-1486 proper config management | |
uriPassage := os.Getenv("TRANSFORMERS_PASSAGE_INFERENCE_API") | |
uriQuery := os.Getenv("TRANSFORMERS_QUERY_INFERENCE_API") | |
uriCommon := os.Getenv("TRANSFORMERS_INFERENCE_API") | |
if uriCommon == "" { | |
if uriPassage == "" && uriQuery == "" { | |
return errors.Errorf("required variable TRANSFORMERS_INFERENCE_API or both variables TRANSFORMERS_PASSAGE_INFERENCE_API and TRANSFORMERS_QUERY_INFERENCE_API are not set") | |
} | |
if uriPassage != "" && uriQuery == "" { | |
return errors.Errorf("required variable TRANSFORMERS_QUERY_INFERENCE_API is not set") | |
} | |
if uriPassage == "" && uriQuery != "" { | |
return errors.Errorf("required variable TRANSFORMERS_PASSAGE_INFERENCE_API is not set") | |
} | |
} else { | |
if uriPassage != "" || uriQuery != "" { | |
return errors.Errorf("either variable TRANSFORMERS_INFERENCE_API or both variables TRANSFORMERS_PASSAGE_INFERENCE_API and TRANSFORMERS_QUERY_INFERENCE_API should be set") | |
} | |
uriPassage = uriCommon | |
uriQuery = uriCommon | |
} | |
client := clients.New(uriPassage, uriQuery, timeout, logger) | |
if err := client.WaitForStartup(ctx, 1*time.Second); err != nil { | |
return errors.Wrap(err, "init remote vectorizer") | |
} | |
m.vectorizer = vectorizer.New(client) | |
m.metaProvider = client | |
return nil | |
} | |
func (m *TransformersModule) initAdditionalPropertiesProvider() error { | |
m.additionalPropertiesProvider = additional.NewText2VecProvider() | |
return nil | |
} | |
func (m *TransformersModule) RootHandler() http.Handler { | |
// TODO: remove once this is a capability interface | |
return nil | |
} | |
func (m *TransformersModule) VectorizeObject(ctx context.Context, | |
obj *models.Object, objDiff *moduletools.ObjectDiff, cfg moduletools.ClassConfig, | |
) error { | |
return m.vectorizer.Object(ctx, obj, objDiff, cfg) | |
} | |
func (m *TransformersModule) MetaInfo() (map[string]interface{}, error) { | |
return m.metaProvider.MetaInfo() | |
} | |
func (m *TransformersModule) AdditionalProperties() map[string]modulecapabilities.AdditionalProperty { | |
return m.additionalPropertiesProvider.AdditionalProperties() | |
} | |
func (m *TransformersModule) VectorizeInput(ctx context.Context, | |
input string, cfg moduletools.ClassConfig, | |
) ([]float32, error) { | |
return m.vectorizer.Texts(ctx, []string{input}, cfg) | |
} | |
// verify we implement the modules.Module interface | |
var ( | |
_ = modulecapabilities.Module(New()) | |
_ = modulecapabilities.Vectorizer(New()) | |
_ = modulecapabilities.MetaProvider(New()) | |
) | |