MVPilgrim
restore
6b502ec
raw
history blame
4.73 kB
// _ _
// __ _____ __ ___ ___ __ _| |_ ___
// \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
// \ V V / __/ (_| |\ V /| | (_| | || __/
// \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
//
// Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
//
// CONTACT: [email protected]
//
package modtransformers
import (
"context"
"testing"
"github.com/sirupsen/logrus"
ltest "github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/weaviate/weaviate/entities/models"
"github.com/weaviate/weaviate/entities/schema"
)
func TestConfigDefaults(t *testing.T) {
t.Run("for properties", func(t *testing.T) {
def := New().ClassConfigDefaults()
assert.Equal(t, true, def["vectorizeClassName"])
assert.Equal(t, "masked_mean", def["poolingStrategy"])
})
t.Run("for the class", func(t *testing.T) {
dt := schema.DataTypeText
def := New().PropertyConfigDefaults(&dt)
assert.Equal(t, false, def["vectorizePropertyName"])
assert.Equal(t, false, def["skip"])
})
}
func TestConfigValidator(t *testing.T) {
t.Run("all usable props no-indexed", func(t *testing.T) {
t.Run("all schema vectorization turned off", func(t *testing.T) {
class := &models.Class{
Vectorizer: "text2vec-contextionary",
Class: "ValidName",
Properties: []*models.Property{
{
DataType: []string{"text"},
Name: "description",
},
{
DataType: schema.DataTypeText.PropString(),
Tokenization: models.PropertyTokenizationWhitespace,
Name: "name",
},
{
DataType: []string{"int"},
Name: "amount",
},
},
}
logger, _ := ltest.NewNullLogger()
v := NewConfigValidator(logger)
err := v.Do(context.Background(), class, nil, &fakeIndexChecker{
vectorizePropertyName: false,
vectorizeClassName: false,
propertyIndexed: false,
})
assert.NotNil(t, err)
})
})
}
func TestConfigValidator_RiskOfDuplicateVectors(t *testing.T) {
type test struct {
name string
in *models.Class
expectWarning bool
indexChecker *fakeIndexChecker
}
tests := []test{
{
name: "usable properties",
in: &models.Class{
Class: "ValidName",
Properties: []*models.Property{
{
DataType: []string{string(schema.DataTypeText)},
Name: "textProp",
},
},
},
expectWarning: false,
indexChecker: &fakeIndexChecker{
vectorizePropertyName: false,
vectorizeClassName: true,
propertyIndexed: true,
},
},
{
name: "no properties",
in: &models.Class{
Class: "ValidName",
},
expectWarning: true,
indexChecker: &fakeIndexChecker{
vectorizePropertyName: false,
vectorizeClassName: true,
propertyIndexed: false,
},
},
{
name: "usable properties, but they are no-indexed",
in: &models.Class{
Class: "ValidName",
Properties: []*models.Property{
{
DataType: []string{string(schema.DataTypeText)},
Name: "textProp",
},
},
},
expectWarning: true,
indexChecker: &fakeIndexChecker{
vectorizePropertyName: false,
vectorizeClassName: true,
propertyIndexed: false,
},
},
{
name: "only unusable properties",
in: &models.Class{
Class: "ValidName",
Properties: []*models.Property{
{
DataType: []string{string(schema.DataTypeInt)},
Name: "intProp",
},
},
},
expectWarning: true,
indexChecker: &fakeIndexChecker{
vectorizePropertyName: false,
vectorizeClassName: true,
propertyIndexed: false,
},
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
logger, hook := ltest.NewNullLogger()
v := NewConfigValidator(logger)
err := v.Do(context.Background(), test.in, nil, test.indexChecker)
require.Nil(t, err)
entry := hook.LastEntry()
if test.expectWarning {
require.NotNil(t, entry)
assert.Equal(t, logrus.WarnLevel, entry.Level)
} else {
assert.Nil(t, entry)
}
})
}
}
type fakeIndexChecker struct {
vectorizeClassName bool
vectorizePropertyName bool
propertyIndexed bool
}
func (f *fakeIndexChecker) VectorizeClassName() bool {
return f.vectorizeClassName
}
func (f *fakeIndexChecker) VectorizePropertyName(propName string) bool {
return f.vectorizePropertyName
}
func (f *fakeIndexChecker) PropertyIndexed(propName string) bool {
return f.propertyIndexed
}