SemanticSearchPOC / usecases /config /config_handler.go
KevinStephenson
Adding in weaviate code
b110593
raw
history blame
14.3 kB
// _ _
// __ _____ __ ___ ___ __ _| |_ ___
// \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
// \ V V / __/ (_| |\ V /| | (_| | || __/
// \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
//
// Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
//
// CONTACT: [email protected]
//
package config
import (
"encoding/json"
"fmt"
"os"
"regexp"
"time"
"github.com/go-openapi/swag"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"github.com/weaviate/weaviate/deprecations"
"github.com/weaviate/weaviate/entities/replication"
"github.com/weaviate/weaviate/entities/schema"
"github.com/weaviate/weaviate/entities/vectorindex/common"
"github.com/weaviate/weaviate/usecases/cluster"
"gopkg.in/yaml.v2"
)
// ServerVersion is set when the misc handlers are setup.
// When misc handlers are setup, the entire swagger spec
// is already being parsed for the server version. This is
// a good time for us to set ServerVersion, so that the
// spec only needs to be parsed once.
var ServerVersion string
// GitHash keeps the current git hash commit information
var GitHash = "unknown"
// DefaultConfigFile is the default file when no config file is provided
const DefaultConfigFile string = "./weaviate.conf.json"
// DefaultCleanupIntervalSeconds can be overwritten on a per-class basis
const DefaultCleanupIntervalSeconds = int64(60)
const (
// These BM25 tuning params can be overwritten on a per-class basis
DefaultBM25k1 = float32(1.2)
DefaultBM25b = float32(0.75)
)
const (
DefaultMaxImportGoroutinesFactor = float64(1.5)
DefaultDiskUseWarningPercentage = uint64(80)
DefaultDiskUseReadonlyPercentage = uint64(90)
DefaultMemUseWarningPercentage = uint64(80)
// TODO: off by default for now, to make sure
// the measurement is reliable. once
// confirmed, we can set this to 90
DefaultMemUseReadonlyPercentage = uint64(0)
)
// Flags are input options
type Flags struct {
ConfigFile string `long:"config-file" description:"path to config file (default: ./weaviate.conf.json)"`
}
// Config outline of the config file
type Config struct {
Name string `json:"name" yaml:"name"`
Debug bool `json:"debug" yaml:"debug"`
QueryDefaults QueryDefaults `json:"query_defaults" yaml:"query_defaults"`
QueryMaximumResults int64 `json:"query_maximum_results" yaml:"query_maximum_results"`
QueryNestedCrossReferenceLimit int64 `json:"query_nested_cross_reference_limit" yaml:"query_nested_cross_reference_limit"`
Contextionary Contextionary `json:"contextionary" yaml:"contextionary"`
Authentication Authentication `json:"authentication" yaml:"authentication"`
Authorization Authorization `json:"authorization" yaml:"authorization"`
Origin string `json:"origin" yaml:"origin"`
Persistence Persistence `json:"persistence" yaml:"persistence"`
DefaultVectorizerModule string `json:"default_vectorizer_module" yaml:"default_vectorizer_module"`
DefaultVectorDistanceMetric string `json:"default_vector_distance_metric" yaml:"default_vector_distance_metric"`
EnableModules string `json:"enable_modules" yaml:"enable_modules"`
ModulesPath string `json:"modules_path" yaml:"modules_path"`
ModuleHttpClientTimeout time.Duration `json:"modules_client_timeout" yaml:"modules_client_timeout"`
AutoSchema AutoSchema `json:"auto_schema" yaml:"auto_schema"`
Cluster cluster.Config `json:"cluster" yaml:"cluster"`
Replication replication.GlobalConfig `json:"replication" yaml:"replication"`
Monitoring Monitoring `json:"monitoring" yaml:"monitoring"`
GRPC GRPC `json:"grpc" yaml:"grpc"`
Profiling Profiling `json:"profiling" yaml:"profiling"`
ResourceUsage ResourceUsage `json:"resource_usage" yaml:"resource_usage"`
MaxImportGoroutinesFactor float64 `json:"max_import_goroutine_factor" yaml:"max_import_goroutine_factor"`
MaximumConcurrentGetRequests int `json:"maximum_concurrent_get_requests" yaml:"maximum_concurrent_get_requests"`
TrackVectorDimensions bool `json:"track_vector_dimensions" yaml:"track_vector_dimensions"`
ReindexVectorDimensionsAtStartup bool `json:"reindex_vector_dimensions_at_startup" yaml:"reindex_vector_dimensions_at_startup"`
DisableLazyLoadShards bool `json:"disable_lazy_load_shards" yaml:"disable_lazy_load_shards"`
RecountPropertiesAtStartup bool `json:"recount_properties_at_startup" yaml:"recount_properties_at_startup"`
ReindexSetToRoaringsetAtStartup bool `json:"reindex_set_to_roaringset_at_startup" yaml:"reindex_set_to_roaringset_at_startup"`
IndexMissingTextFilterableAtStartup bool `json:"index_missing_text_filterable_at_startup" yaml:"index_missing_text_filterable_at_startup"`
DisableGraphQL bool `json:"disable_graphql" yaml:"disable_graphql"`
AvoidMmap bool `json:"avoid_mmap" yaml:"avoid_mmap"`
CORS CORS `json:"cors" yaml:"cors"`
}
type moduleProvider interface {
ValidateVectorizer(moduleName string) error
}
// Validate the non-nested parameters. Nested objects must provide their own
// validation methods
func (c Config) Validate(modProv moduleProvider) error {
if err := c.validateDefaultVectorizerModule(modProv); err != nil {
return errors.Wrap(err, "default vectorizer module")
}
if err := c.validateDefaultVectorDistanceMetric(); err != nil {
return errors.Wrap(err, "default vector distance metric")
}
return nil
}
func (c Config) validateDefaultVectorizerModule(modProv moduleProvider) error {
if c.DefaultVectorizerModule == VectorizerModuleNone {
return nil
}
return modProv.ValidateVectorizer(c.DefaultVectorizerModule)
}
func (c Config) validateDefaultVectorDistanceMetric() error {
switch c.DefaultVectorDistanceMetric {
case "", common.DistanceCosine, common.DistanceDot, common.DistanceL2Squared, common.DistanceManhattan, common.DistanceHamming:
return nil
default:
return fmt.Errorf("must be one of [\"cosine\", \"dot\", \"l2-squared\", \"manhattan\",\"hamming\"]")
}
}
type AutoSchema struct {
Enabled bool `json:"enabled" yaml:"enabled"`
DefaultString string `json:"defaultString" yaml:"defaultString"`
DefaultNumber string `json:"defaultNumber" yaml:"defaultNumber"`
DefaultDate string `json:"defaultDate" yaml:"defaultDate"`
}
func (a AutoSchema) Validate() error {
if a.DefaultNumber != "int" && a.DefaultNumber != "number" {
return fmt.Errorf("autoSchema.defaultNumber must be either 'int' or 'number")
}
if a.DefaultString != schema.DataTypeText.String() &&
a.DefaultString != schema.DataTypeString.String() {
return fmt.Errorf("autoSchema.defaultString must be either 'string' or 'text")
}
if a.DefaultDate != "date" &&
a.DefaultDate != schema.DataTypeText.String() &&
a.DefaultDate != schema.DataTypeString.String() {
return fmt.Errorf("autoSchema.defaultDate must be either 'date' or 'string' or 'text")
}
return nil
}
// QueryDefaults for optional parameters
type QueryDefaults struct {
Limit int64 `json:"limit" yaml:"limit"`
}
type Contextionary struct {
URL string `json:"url" yaml:"url"`
}
type Monitoring struct {
Enabled bool `json:"enabled" yaml:"enabled"`
Tool string `json:"tool" yaml:"tool"`
Port int `json:"port" yaml:"port"`
Group bool `json:"group_classes" yaml:"group_classes"`
}
// Support independent TLS credentials for gRPC
type GRPC struct {
Port int `json:"port" yaml:"port"`
CertFile string `json:"certFile" yaml:"certFile"`
KeyFile string `json:"keyFile" yaml:"keyFile"`
}
type Profiling struct {
BlockProfileRate int `json:"blockProfileRate" yaml:"blockProfileRate"`
MutexProfileFraction int `json:"mutexProfileFraction" yaml:"mutexProfileFraction"`
}
type Persistence struct {
DataPath string `json:"dataPath" yaml:"dataPath"`
FlushIdleMemtablesAfter int `json:"flushIdleMemtablesAfter" yaml:"flushIdleMemtablesAfter"`
MemtablesMaxSizeMB int `json:"memtablesMaxSizeMB" yaml:"memtablesMaxSizeMB"`
MemtablesMinActiveDurationSeconds int `json:"memtablesMinActiveDurationSeconds" yaml:"memtablesMinActiveDurationSeconds"`
MemtablesMaxActiveDurationSeconds int `json:"memtablesMaxActiveDurationSeconds" yaml:"memtablesMaxActiveDurationSeconds"`
}
func (p Persistence) Validate() error {
if p.DataPath == "" {
return fmt.Errorf("persistence.dataPath must be set")
}
return nil
}
type DiskUse struct {
WarningPercentage uint64 `json:"warning_percentage" yaml:"warning_percentage"`
ReadOnlyPercentage uint64 `json:"readonly_percentage" yaml:"readonly_percentage"`
}
func (d DiskUse) Validate() error {
if d.WarningPercentage > 100 {
return fmt.Errorf("disk_use.read_only_percentage must be between 0 and 100")
}
if d.ReadOnlyPercentage > 100 {
return fmt.Errorf("disk_use.read_only_percentage must be between 0 and 100")
}
return nil
}
type MemUse struct {
WarningPercentage uint64 `json:"warning_percentage" yaml:"warning_percentage"`
ReadOnlyPercentage uint64 `json:"readonly_percentage" yaml:"readonly_percentage"`
}
func (m MemUse) Validate() error {
if m.WarningPercentage > 100 {
return fmt.Errorf("mem_use.read_only_percentage must be between 0 and 100")
}
if m.ReadOnlyPercentage > 100 {
return fmt.Errorf("mem_use.read_only_percentage must be between 0 and 100")
}
return nil
}
type ResourceUsage struct {
DiskUse DiskUse
MemUse MemUse
}
type CORS struct {
AllowOrigin string `json:"allow_origin" yaml:"allow_origin"`
AllowMethods string `json:"allow_methods" yaml:"allow_methods"`
AllowHeaders string `json:"allow_headers" yaml:"allow_headers"`
}
const (
DefaultCORSAllowOrigin = "*"
DefaultCORSAllowMethods = "*"
DefaultCORSAllowHeaders = "Content-Type, Authorization, Batch, X-Openai-Api-Key, X-Openai-Organization, X-Openai-Baseurl, X-Anyscale-Baseurl, X-Anyscale-Api-Key, X-Cohere-Api-Key, X-Cohere-Baseurl, X-Huggingface-Api-Key, X-Azure-Api-Key, X-Palm-Api-Key, X-Jinaai-Api-Key, X-Aws-Access-Key, X-Aws-Secret-Key"
)
func (r ResourceUsage) Validate() error {
if err := r.DiskUse.Validate(); err != nil {
return err
}
if err := r.MemUse.Validate(); err != nil {
return err
}
return nil
}
// GetConfigOptionGroup creates an option group for swagger
func GetConfigOptionGroup() *swag.CommandLineOptionsGroup {
commandLineOptionsGroup := swag.CommandLineOptionsGroup{
ShortDescription: "Connector config & MQTT config",
LongDescription: "",
Options: &Flags{},
}
return &commandLineOptionsGroup
}
// WeaviateConfig represents the used schema's
type WeaviateConfig struct {
Config Config
Hostname string
Scheme string
}
// GetHostAddress from config locations
func (f *WeaviateConfig) GetHostAddress() string {
return fmt.Sprintf("%s://%s", f.Scheme, f.Hostname)
}
// LoadConfig from config locations
func (f *WeaviateConfig) LoadConfig(flags *swag.CommandLineOptionsGroup, logger logrus.FieldLogger) error {
// Get command line flags
configFileName := flags.Options.(*Flags).ConfigFile
// Set default if not given
if configFileName == "" {
configFileName = DefaultConfigFile
}
// Read config file
file, err := os.ReadFile(configFileName)
_ = err // explicitly ignore
if len(file) > 0 {
logger.WithField("action", "config_load").WithField("config_file_path", configFileName).
Info("Usage of the weaviate.conf.json file is deprecated and will be removed in the future. Please use environment variables.")
config, err := f.parseConfigFile(file, configFileName)
if err != nil {
return configErr(err)
}
f.Config = config
deprecations.Log(logger, "config-files")
}
if err := FromEnv(&f.Config); err != nil {
return configErr(err)
}
if err := f.Config.Authentication.Validate(); err != nil {
return configErr(err)
}
if err := f.Config.Authorization.Validate(); err != nil {
return configErr(err)
}
if err := f.Config.Persistence.Validate(); err != nil {
return configErr(err)
}
if err := f.Config.AutoSchema.Validate(); err != nil {
return configErr(err)
}
if err := f.Config.ResourceUsage.Validate(); err != nil {
return configErr(err)
}
return nil
}
func (f *WeaviateConfig) parseConfigFile(file []byte, name string) (Config, error) {
var config Config
m := regexp.MustCompile(`.*\.(\w+)$`).FindStringSubmatch(name)
if len(m) < 2 {
return config, fmt.Errorf("config file does not have a file ending, got '%s'", name)
}
switch m[1] {
case "json":
err := json.Unmarshal(file, &config)
if err != nil {
return config, fmt.Errorf("error unmarshalling the json config file: %s", err)
}
case "yaml":
err := yaml.Unmarshal(file, &config)
if err != nil {
return config, fmt.Errorf("error unmarshalling the yaml config file: %s", err)
}
default:
return config, fmt.Errorf("unsupported config file extension '%s', use .yaml or .json", m[1])
}
return config, nil
}
func configErr(err error) error {
return fmt.Errorf("invalid config: %v", err)
}