Spaces:
Running
Running
// _ _ | |
// __ _____ __ ___ ___ __ _| |_ ___ | |
// \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ | |
// \ V V / __/ (_| |\ V /| | (_| | || __/ | |
// \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| | |
// | |
// Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. | |
// | |
// CONTACT: [email protected] | |
// | |
package config | |
import ( | |
"encoding/json" | |
"fmt" | |
"os" | |
"regexp" | |
"time" | |
"github.com/go-openapi/swag" | |
"github.com/pkg/errors" | |
"github.com/sirupsen/logrus" | |
"github.com/weaviate/weaviate/deprecations" | |
"github.com/weaviate/weaviate/entities/replication" | |
"github.com/weaviate/weaviate/entities/schema" | |
"github.com/weaviate/weaviate/entities/vectorindex/common" | |
"github.com/weaviate/weaviate/usecases/cluster" | |
"gopkg.in/yaml.v2" | |
) | |
// ServerVersion is set when the misc handlers are setup. | |
// When misc handlers are setup, the entire swagger spec | |
// is already being parsed for the server version. This is | |
// a good time for us to set ServerVersion, so that the | |
// spec only needs to be parsed once. | |
var ServerVersion string | |
// GitHash keeps the current git hash commit information | |
var GitHash = "unknown" | |
// DefaultConfigFile is the default file when no config file is provided | |
const DefaultConfigFile string = "./weaviate.conf.json" | |
// DefaultCleanupIntervalSeconds can be overwritten on a per-class basis | |
const DefaultCleanupIntervalSeconds = int64(60) | |
const ( | |
// These BM25 tuning params can be overwritten on a per-class basis | |
DefaultBM25k1 = float32(1.2) | |
DefaultBM25b = float32(0.75) | |
) | |
const ( | |
DefaultMaxImportGoroutinesFactor = float64(1.5) | |
DefaultDiskUseWarningPercentage = uint64(80) | |
DefaultDiskUseReadonlyPercentage = uint64(90) | |
DefaultMemUseWarningPercentage = uint64(80) | |
// TODO: off by default for now, to make sure | |
// the measurement is reliable. once | |
// confirmed, we can set this to 90 | |
DefaultMemUseReadonlyPercentage = uint64(0) | |
) | |
// Flags are input options | |
type Flags struct { | |
ConfigFile string `long:"config-file" description:"path to config file (default: ./weaviate.conf.json)"` | |
} | |
// Config outline of the config file | |
type Config struct { | |
Name string `json:"name" yaml:"name"` | |
Debug bool `json:"debug" yaml:"debug"` | |
QueryDefaults QueryDefaults `json:"query_defaults" yaml:"query_defaults"` | |
QueryMaximumResults int64 `json:"query_maximum_results" yaml:"query_maximum_results"` | |
QueryNestedCrossReferenceLimit int64 `json:"query_nested_cross_reference_limit" yaml:"query_nested_cross_reference_limit"` | |
Contextionary Contextionary `json:"contextionary" yaml:"contextionary"` | |
Authentication Authentication `json:"authentication" yaml:"authentication"` | |
Authorization Authorization `json:"authorization" yaml:"authorization"` | |
Origin string `json:"origin" yaml:"origin"` | |
Persistence Persistence `json:"persistence" yaml:"persistence"` | |
DefaultVectorizerModule string `json:"default_vectorizer_module" yaml:"default_vectorizer_module"` | |
DefaultVectorDistanceMetric string `json:"default_vector_distance_metric" yaml:"default_vector_distance_metric"` | |
EnableModules string `json:"enable_modules" yaml:"enable_modules"` | |
ModulesPath string `json:"modules_path" yaml:"modules_path"` | |
ModuleHttpClientTimeout time.Duration `json:"modules_client_timeout" yaml:"modules_client_timeout"` | |
AutoSchema AutoSchema `json:"auto_schema" yaml:"auto_schema"` | |
Cluster cluster.Config `json:"cluster" yaml:"cluster"` | |
Replication replication.GlobalConfig `json:"replication" yaml:"replication"` | |
Monitoring Monitoring `json:"monitoring" yaml:"monitoring"` | |
GRPC GRPC `json:"grpc" yaml:"grpc"` | |
Profiling Profiling `json:"profiling" yaml:"profiling"` | |
ResourceUsage ResourceUsage `json:"resource_usage" yaml:"resource_usage"` | |
MaxImportGoroutinesFactor float64 `json:"max_import_goroutine_factor" yaml:"max_import_goroutine_factor"` | |
MaximumConcurrentGetRequests int `json:"maximum_concurrent_get_requests" yaml:"maximum_concurrent_get_requests"` | |
TrackVectorDimensions bool `json:"track_vector_dimensions" yaml:"track_vector_dimensions"` | |
ReindexVectorDimensionsAtStartup bool `json:"reindex_vector_dimensions_at_startup" yaml:"reindex_vector_dimensions_at_startup"` | |
DisableLazyLoadShards bool `json:"disable_lazy_load_shards" yaml:"disable_lazy_load_shards"` | |
RecountPropertiesAtStartup bool `json:"recount_properties_at_startup" yaml:"recount_properties_at_startup"` | |
ReindexSetToRoaringsetAtStartup bool `json:"reindex_set_to_roaringset_at_startup" yaml:"reindex_set_to_roaringset_at_startup"` | |
IndexMissingTextFilterableAtStartup bool `json:"index_missing_text_filterable_at_startup" yaml:"index_missing_text_filterable_at_startup"` | |
DisableGraphQL bool `json:"disable_graphql" yaml:"disable_graphql"` | |
AvoidMmap bool `json:"avoid_mmap" yaml:"avoid_mmap"` | |
CORS CORS `json:"cors" yaml:"cors"` | |
} | |
type moduleProvider interface { | |
ValidateVectorizer(moduleName string) error | |
} | |
// Validate the non-nested parameters. Nested objects must provide their own | |
// validation methods | |
func (c Config) Validate(modProv moduleProvider) error { | |
if err := c.validateDefaultVectorizerModule(modProv); err != nil { | |
return errors.Wrap(err, "default vectorizer module") | |
} | |
if err := c.validateDefaultVectorDistanceMetric(); err != nil { | |
return errors.Wrap(err, "default vector distance metric") | |
} | |
return nil | |
} | |
func (c Config) validateDefaultVectorizerModule(modProv moduleProvider) error { | |
if c.DefaultVectorizerModule == VectorizerModuleNone { | |
return nil | |
} | |
return modProv.ValidateVectorizer(c.DefaultVectorizerModule) | |
} | |
func (c Config) validateDefaultVectorDistanceMetric() error { | |
switch c.DefaultVectorDistanceMetric { | |
case "", common.DistanceCosine, common.DistanceDot, common.DistanceL2Squared, common.DistanceManhattan, common.DistanceHamming: | |
return nil | |
default: | |
return fmt.Errorf("must be one of [\"cosine\", \"dot\", \"l2-squared\", \"manhattan\",\"hamming\"]") | |
} | |
} | |
type AutoSchema struct { | |
Enabled bool `json:"enabled" yaml:"enabled"` | |
DefaultString string `json:"defaultString" yaml:"defaultString"` | |
DefaultNumber string `json:"defaultNumber" yaml:"defaultNumber"` | |
DefaultDate string `json:"defaultDate" yaml:"defaultDate"` | |
} | |
func (a AutoSchema) Validate() error { | |
if a.DefaultNumber != "int" && a.DefaultNumber != "number" { | |
return fmt.Errorf("autoSchema.defaultNumber must be either 'int' or 'number") | |
} | |
if a.DefaultString != schema.DataTypeText.String() && | |
a.DefaultString != schema.DataTypeString.String() { | |
return fmt.Errorf("autoSchema.defaultString must be either 'string' or 'text") | |
} | |
if a.DefaultDate != "date" && | |
a.DefaultDate != schema.DataTypeText.String() && | |
a.DefaultDate != schema.DataTypeString.String() { | |
return fmt.Errorf("autoSchema.defaultDate must be either 'date' or 'string' or 'text") | |
} | |
return nil | |
} | |
// QueryDefaults for optional parameters | |
type QueryDefaults struct { | |
Limit int64 `json:"limit" yaml:"limit"` | |
} | |
type Contextionary struct { | |
URL string `json:"url" yaml:"url"` | |
} | |
type Monitoring struct { | |
Enabled bool `json:"enabled" yaml:"enabled"` | |
Tool string `json:"tool" yaml:"tool"` | |
Port int `json:"port" yaml:"port"` | |
Group bool `json:"group_classes" yaml:"group_classes"` | |
} | |
// Support independent TLS credentials for gRPC | |
type GRPC struct { | |
Port int `json:"port" yaml:"port"` | |
CertFile string `json:"certFile" yaml:"certFile"` | |
KeyFile string `json:"keyFile" yaml:"keyFile"` | |
} | |
type Profiling struct { | |
BlockProfileRate int `json:"blockProfileRate" yaml:"blockProfileRate"` | |
MutexProfileFraction int `json:"mutexProfileFraction" yaml:"mutexProfileFraction"` | |
} | |
type Persistence struct { | |
DataPath string `json:"dataPath" yaml:"dataPath"` | |
FlushIdleMemtablesAfter int `json:"flushIdleMemtablesAfter" yaml:"flushIdleMemtablesAfter"` | |
MemtablesMaxSizeMB int `json:"memtablesMaxSizeMB" yaml:"memtablesMaxSizeMB"` | |
MemtablesMinActiveDurationSeconds int `json:"memtablesMinActiveDurationSeconds" yaml:"memtablesMinActiveDurationSeconds"` | |
MemtablesMaxActiveDurationSeconds int `json:"memtablesMaxActiveDurationSeconds" yaml:"memtablesMaxActiveDurationSeconds"` | |
} | |
func (p Persistence) Validate() error { | |
if p.DataPath == "" { | |
return fmt.Errorf("persistence.dataPath must be set") | |
} | |
return nil | |
} | |
type DiskUse struct { | |
WarningPercentage uint64 `json:"warning_percentage" yaml:"warning_percentage"` | |
ReadOnlyPercentage uint64 `json:"readonly_percentage" yaml:"readonly_percentage"` | |
} | |
func (d DiskUse) Validate() error { | |
if d.WarningPercentage > 100 { | |
return fmt.Errorf("disk_use.read_only_percentage must be between 0 and 100") | |
} | |
if d.ReadOnlyPercentage > 100 { | |
return fmt.Errorf("disk_use.read_only_percentage must be between 0 and 100") | |
} | |
return nil | |
} | |
type MemUse struct { | |
WarningPercentage uint64 `json:"warning_percentage" yaml:"warning_percentage"` | |
ReadOnlyPercentage uint64 `json:"readonly_percentage" yaml:"readonly_percentage"` | |
} | |
func (m MemUse) Validate() error { | |
if m.WarningPercentage > 100 { | |
return fmt.Errorf("mem_use.read_only_percentage must be between 0 and 100") | |
} | |
if m.ReadOnlyPercentage > 100 { | |
return fmt.Errorf("mem_use.read_only_percentage must be between 0 and 100") | |
} | |
return nil | |
} | |
type ResourceUsage struct { | |
DiskUse DiskUse | |
MemUse MemUse | |
} | |
type CORS struct { | |
AllowOrigin string `json:"allow_origin" yaml:"allow_origin"` | |
AllowMethods string `json:"allow_methods" yaml:"allow_methods"` | |
AllowHeaders string `json:"allow_headers" yaml:"allow_headers"` | |
} | |
const ( | |
DefaultCORSAllowOrigin = "*" | |
DefaultCORSAllowMethods = "*" | |
DefaultCORSAllowHeaders = "Content-Type, Authorization, Batch, X-Openai-Api-Key, X-Openai-Organization, X-Openai-Baseurl, X-Anyscale-Baseurl, X-Anyscale-Api-Key, X-Cohere-Api-Key, X-Cohere-Baseurl, X-Huggingface-Api-Key, X-Azure-Api-Key, X-Palm-Api-Key, X-Jinaai-Api-Key, X-Aws-Access-Key, X-Aws-Secret-Key" | |
) | |
func (r ResourceUsage) Validate() error { | |
if err := r.DiskUse.Validate(); err != nil { | |
return err | |
} | |
if err := r.MemUse.Validate(); err != nil { | |
return err | |
} | |
return nil | |
} | |
// GetConfigOptionGroup creates an option group for swagger | |
func GetConfigOptionGroup() *swag.CommandLineOptionsGroup { | |
commandLineOptionsGroup := swag.CommandLineOptionsGroup{ | |
ShortDescription: "Connector config & MQTT config", | |
LongDescription: "", | |
Options: &Flags{}, | |
} | |
return &commandLineOptionsGroup | |
} | |
// WeaviateConfig represents the used schema's | |
type WeaviateConfig struct { | |
Config Config | |
Hostname string | |
Scheme string | |
} | |
// GetHostAddress from config locations | |
func (f *WeaviateConfig) GetHostAddress() string { | |
return fmt.Sprintf("%s://%s", f.Scheme, f.Hostname) | |
} | |
// LoadConfig from config locations | |
func (f *WeaviateConfig) LoadConfig(flags *swag.CommandLineOptionsGroup, logger logrus.FieldLogger) error { | |
// Get command line flags | |
configFileName := flags.Options.(*Flags).ConfigFile | |
// Set default if not given | |
if configFileName == "" { | |
configFileName = DefaultConfigFile | |
} | |
// Read config file | |
file, err := os.ReadFile(configFileName) | |
_ = err // explicitly ignore | |
if len(file) > 0 { | |
logger.WithField("action", "config_load").WithField("config_file_path", configFileName). | |
Info("Usage of the weaviate.conf.json file is deprecated and will be removed in the future. Please use environment variables.") | |
config, err := f.parseConfigFile(file, configFileName) | |
if err != nil { | |
return configErr(err) | |
} | |
f.Config = config | |
deprecations.Log(logger, "config-files") | |
} | |
if err := FromEnv(&f.Config); err != nil { | |
return configErr(err) | |
} | |
if err := f.Config.Authentication.Validate(); err != nil { | |
return configErr(err) | |
} | |
if err := f.Config.Authorization.Validate(); err != nil { | |
return configErr(err) | |
} | |
if err := f.Config.Persistence.Validate(); err != nil { | |
return configErr(err) | |
} | |
if err := f.Config.AutoSchema.Validate(); err != nil { | |
return configErr(err) | |
} | |
if err := f.Config.ResourceUsage.Validate(); err != nil { | |
return configErr(err) | |
} | |
return nil | |
} | |
func (f *WeaviateConfig) parseConfigFile(file []byte, name string) (Config, error) { | |
var config Config | |
m := regexp.MustCompile(`.*\.(\w+)$`).FindStringSubmatch(name) | |
if len(m) < 2 { | |
return config, fmt.Errorf("config file does not have a file ending, got '%s'", name) | |
} | |
switch m[1] { | |
case "json": | |
err := json.Unmarshal(file, &config) | |
if err != nil { | |
return config, fmt.Errorf("error unmarshalling the json config file: %s", err) | |
} | |
case "yaml": | |
err := yaml.Unmarshal(file, &config) | |
if err != nil { | |
return config, fmt.Errorf("error unmarshalling the yaml config file: %s", err) | |
} | |
default: | |
return config, fmt.Errorf("unsupported config file extension '%s', use .yaml or .json", m[1]) | |
} | |
return config, nil | |
} | |
func configErr(err error) error { | |
return fmt.Errorf("invalid config: %v", err) | |
} | |