Spaces:

MVPilgrim
/

SemanticSearchPOC

Running

SemanticSearchPOC / modules /text2vec-openai /config.go

KevinStephenson

Adding in weaviate code

b110593 over 1 year ago

5.56 kB

	// _ _
	// __ _____ __ ___ ___ __ _\| \|_ ___
	// \ \ /\ / / _ \/ _` \ \ / / \|/ _` \| __/ _ \
	// \ V V / __/ (_\| \|\ V /\| \| (_\| \| \|\| __/
	// \_/\_/ \___\|\__,_\| \_/ \|_\|\__,_\|\__\___\|
	//
	// Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
	//
	// CONTACT: [email protected]
	//

	package modopenai

	import (
	"context"

	"github.com/weaviate/weaviate/entities/models"
	"github.com/weaviate/weaviate/entities/modulecapabilities"
	"github.com/weaviate/weaviate/entities/moduletools"
	"github.com/weaviate/weaviate/entities/schema"
	"github.com/weaviate/weaviate/modules/text2vec-openai/vectorizer"
	)

	func (m *OpenAIModule) ClassConfigDefaults() map[string]interface{} {
	return map[string]interface{}{
	"vectorizeClassName": vectorizer.DefaultVectorizeClassName,
	"type": vectorizer.DefaultOpenAIDocumentType,
	"model": vectorizer.DefaultOpenAIModel,
	"baseURL": vectorizer.DefaultBaseURL,
	"modelVersion": vectorizer.PickDefaultModelVersion(vectorizer.DefaultOpenAIModel,
	vectorizer.DefaultOpenAIDocumentType),
	}
	}

	func (m *OpenAIModule) PropertyConfigDefaults(
	dt *schema.DataType,
	) map[string]interface{} {
	return map[string]interface{}{
	"skip": !vectorizer.DefaultPropertyIndexed,
	"vectorizePropertyName": vectorizer.DefaultVectorizePropertyName,
	}
	}

	func (m *OpenAIModule) ValidateClass(ctx context.Context,
	class *models.Class, cfg moduletools.ClassConfig,
	) error {
	settings := vectorizer.NewClassSettings(cfg)
	return settings.Validate(class)
	}

	var _ = modulecapabilities.ClassConfigurator(New())

	// type ConfigValidator struct {
	// logger logrus.FieldLogger
	// }

	// type ClassSettings interface {
	// VectorizeClassName() bool
	// VectorizePropertyName(propName string) bool
	// PropertyIndexed(propName string) bool
	// }

	// func NewConfigValidator(logger logrus.FieldLogger) *ConfigValidator {
	// return &ConfigValidator{logger: logger}
	// }

	// func (cv ConfigValidator) Do(ctx context.Context, class models.Class,
	// cfg moduletools.ClassConfig, settings ClassSettings) error {
	// // In text2vec-openai (as opposed to e.g. text2vec-contextionary) the
	// // assumption is that the models will be able to deal with any words, even
	// // previously unseen ones. Therefore we do not need to validate individual
	// // properties, but only the overall "index state"

	// if err := cv.validateIndexState(ctx, class, settings); err != nil {
	// return errors.Errorf("invalid combination of properties")
	// }

	// cv.checkForPossibilityOfDuplicateVectors(ctx, class, settings)

	// return nil
	// }

	// func (cv *ConfigValidator) validateIndexState(ctx context.Context,
	// class *models.Class, settings ClassSettings) error {
	// if settings.VectorizeClassName() {
	// // if the user chooses to vectorize the classname, vector-building will
	// // always be possible, no need to investigate further

	// return nil
	// }

	// // search if there is at least one indexed, string/text prop. If found pass
	// // validation
	// for _, prop := range class.Properties {
	// if len(prop.DataType) < 1 {
	// return errors.Errorf("property %s must have at least one datatype: "+
	// "got %v", prop.Name, prop.DataType)
	// }

	// if prop.DataType[0] != string(schema.DataTypeText) {
	// // we can only vectorize text-like props
	// continue
	// }

	// if settings.PropertyIndexed(prop.Name) {
	// // found at least one, this is a valid schema
	// return nil
	// }
	// }

	// return fmt.Errorf("invalid properties: didn't find a single property which is " +
	// "of type string or text and is not excluded from indexing. In addition the " +
	// "class name is excluded from vectorization as well, meaning that it cannot be " +
	// "used to determine the vector position. To fix this, set 'vectorizeClassName' " +
	// "to true if the class name is contextionary-valid. Alternatively add at least " +
	// "contextionary-valid text/string property which is not excluded from " +
	// "indexing.")
	// }

	// func (cv *ConfigValidator) checkForPossibilityOfDuplicateVectors(
	// ctx context.Context, class *models.Class, settings ClassSettings) {
	// if !settings.VectorizeClassName() {
	// // if the user choses not to vectorize the class name, this means they must
	// // have chosen something else to vectorize, otherwise the validation would
	// // have error'd before we ever got here. We can skip further checking.

	// return
	// }

	// // search if there is at least one indexed, string/text prop. If found exit
	// for _, prop := range class.Properties {
	// // length check skipped, because validation has already passed
	// if prop.DataType[0] != string(schema.DataTypeText) {
	// // we can only vectorize text-like props
	// continue
	// }

	// if settings.PropertyIndexed(prop.Name) {
	// // found at least one
	// return
	// }
	// }

	// cv.logger.WithField("module", "text2vec-openai").
	// WithField("class", class.Class).
	// Warnf("text2vec-openai: Class %q does not have any properties "+
	// "indexed (or only non text-properties indexed) and the vector position is "+
	// "only determined by the class name. Each object will end up with the same "+
	// "vector which leads to a severe performance penalty on imports. Consider "+
	// "setting vectorIndexConfig.skip=true for this property", class.Class)
	// }